diff --git a/doc/formats.html b/doc/formats.html
index 3adf793..b2e97c4 100644
--- a/doc/formats.html
+++ b/doc/formats.html
@@ -367,7 +367,7 @@ endian, i.e. least significant byte first.
With the 0
flag, the value is unsigned, otherwise signed.
-In output, the prec (or sizeof(long) whatever is less) least
+In output, the precision (or sizeof(long) whatever is less) least
significant bytes of the value are sign extended or zero extended
(depending on the 0
flag) to width bytes.
"abcdefg%<xor>"
the checksum is calculated
from abcdefg
,
but in "abcdefg%2.1<xor>"
only from cdef
.
@@ -534,35 +534,38 @@ This input-only format matches Perl compatible regular expressions (PCRE).
It is only available if a PCRE library is installed.
+
If PCRE is not available for your host or cross architecture, download
the sourcecode from www.pcre.org
and try my EPICS compatible Makefile
-to compile it like a normal EPICS application.
+to compile it like a normal EPICS support module.
The Makefile is known to work with EPICS 3.14.8 and PCRE 7.2.
In your RELEASE file define the variable PCRE
so that
it points to the install location of PCRE.
-If PCRE is already installed on your system, use the variables
-PCRE_INCLUDE
and PCRE_LIB
instead to provide
-the install directories of pcre.h
and the library.
-
-If you have PCRE installed in different locations for different (cross)
-architectures, define the variables in RELEASE.Common.<architecture>
-instead of the global RELEASE file.
+If PCRE is already installed on (some of) your systems, you may add
+architectures where PCRE can be found in standard include and library
+locations to the variable WITH_SYSTEM_PCRE
.
+If either the header file or the library are in a non-standard place,
+set in your RELEASE file the variables PCRE_INCLUDE_arch
+and/or PCRE_LIB_arch
for the respective architectures
+to the correct directories or set
+PCRE_INCLUDE
and/or PCRE_LIB
+in architecture specific RELEASE.Common.arch files.
If the regular expression is not anchored, i.e. does not start with
^
, leading non-matching input is skipped.
A maximum of width bytes is matched, if specified.
-If prec is given, it specifies the sub-expression whose match
+If precision is given, it specifies the sub-expression whose match
is retuned.
Otherwise the complete match is returned.
In any case, the complete match is consumed from the input buffer.
-If the expression contains a /
it must be escaped.
+If the expression contains a /
it must be escaped like \/
.
Example: %.1/<title>(.*)<\/title>/
returns
@@ -579,48 +582,63 @@ it can be used as a pre-processor for input or
as a post-processor for output.
-Any match of the regex is replaced by the string subst with any
-&
or \0
in subst replaced with the match itself and any
-\1
through \9
with the corresponding sub-expressions.
-To get a literal &
or \
in the substitution write
-\&
or \\
.
+Matches of the regex are replaced by the string subst with all
+&
or \0
in subst replaced with the match itself and all
+\1
through \9
replaced with the match of the corresponding sub-expression.
+To get a literal &
or \
or /
in the substitution write
+\&
or \\
or \/
.
+There is no way to specify literal bytes with values less or equal to 9 in the
+substitution!
If width is specified, it limits the number of characters processed.
If the -
flag is used (i.e. width looks like a negative number)
-only the last width caracters are processed, else the first.
-Without width all available characters are processed.
+only the last width characters are processed, else the first.
+Without width (or 0) all available characters are processed.
-If prec is specified, it limits the number of times the substitution is applied.
-Without prec, the substitution is applied as often as possible.
+If precision is specified, it indicates which matches to replace.
+With the +
flag given, precision is the maximum
+number of matches to replace.
+Otherwise precision is the index (counting from 1) of the match to replace.
+Without precision (or 0), all matches are replaced.
In input this converter pre-processes data received from the device before -other converters after this one read it. -Converters before this one will see unmodified input. +following converters read it. +Converters preceding this one will read unmodified input. Thus place this converter before those whose input should be pre-processed.
-In output it post-processes data already formatted by other converters before this one +In output it post-processes data already formatted by preceding converters before sending it to the device. -Converters after this one will send their output unmodified. +Converters following this one will send their output unmodified. Thus place this converter after those whose output should be post-processed.
-Examples:
-%#-10.2/ab/X/
replaces the string ab
with X
+Examples:
+
%#+-10.2/ab/X/
replaces the string ab
with X
maximal 2 times in the last 10 characters.
-(abcabcabcabc
becomes abcXcXcabc
)%#/..\B/&:/
writes :
after every second character
+(abcabcabcabc
becomes abcXcXcabc
)
+%#/\\/\//
replaces all \
with /
+(\dir\file
becomes /dir/file
)
+%#/..\B/&:/
inserts :
after every second character
which is not at the end of a word.
-(0b19353134
becomes 0b:19:35:31:34
)%#/://
removes all :
.
-(0b:19:35:31:34
becomes 0b19353134
)0b19353134
becomes 0b:19:35:31:34
)
+%#/://
removes all :
characters.
+(0b:19:35:31:34
becomes 0b19353134
)
+%#/([^+-])*([+-])/\2\1/
moves a postfix sign to the front.
(1.23-
becomes -1.23
)%m
)@@ -679,7 +697,7 @@ In output, the system function strftime() is used to format the time. There may be differences in the implementation between operating systems.
-In input, StreamDevice used its own implementation because many +In input, StreamDevice uses its own implementation because many systems are missing the strptime() function and additional formats are supported.
diff --git a/doc/stream.css b/doc/stream.css index b83381b..a7974c9 100644 --- a/doc/stream.css +++ b/doc/stream.css @@ -88,6 +88,16 @@ code { text-align:left; } +.box { + margin-left:1ex; + margin-right:1ex; + margin-top:0.5ex; + padding: 0 1ex; + border: 1px solid black; + text-align:left; + background-color:#f0f0f0; +} + #navleft { position:fixed; left:0; diff --git a/src/RegexpConverter.cc b/src/RegexpConverter.cc index 3bc4df7..693ffd3 100644 --- a/src/RegexpConverter.cc +++ b/src/RegexpConverter.cc @@ -23,7 +23,7 @@ #include "string.h" #include "pcre.h" -// Perl regular expressions (PCRE) %/regexp/ +// Perl regular expressions (PCRE) %/regexp/ and %#/regexp/subst/ /* Notes: - Memory for compiled regexp is allocated in parse but never freed. @@ -65,15 +65,22 @@ parse(const StreamFormat& fmt, StreamBuffer& info, error("Missing closing '/' after %%/%s format conversion\n", pattern()); return false; } - if (*source == esc) { - source++; - pattern.append('\\'); - continue; + if (*source == esc) { // handle escaped chars + if (*++source != '/') // just un-escape / + { + pattern.append('\\'); + if ((*source & 0x7f) < 0x30) // handle control chars + { + pattern.print("x%02x", *source++); + continue; + } + // fall through for PCRE codes like \B + } } pattern.append(*source++); } source++; - debug("regexp = \"%s\"\n", pattern()); + debug("regexp = \"%s\"\n", pattern.expand()()); const char* errormsg; int eoffset; @@ -89,22 +96,19 @@ parse(const StreamFormat& fmt, StreamBuffer& info, if (fmt.flags & alt_flag) { StreamBuffer subst; + debug("check for subst in \"%s\"\n", StreamBuffer(source).expand()()); while (*source != '/') { if (!*source) { error("Missing closing '/' after %%#/%s/%s format conversion\n", pattern(), subst()); return false; } - if (*source == esc) { - source++; - subst.append('\\'); - if (*source <= 9) subst.append('0'+*source++); - continue; - } + if (*source == esc) + subst.append(*source++); subst.append(*source++); } source++; - debug("subst = \"%s\"\n", subst()); + debug("subst = \"%s\"\n", subst.expand()()); info.append(subst).append('\0'); return pseudo_format; } @@ -131,7 +135,7 @@ scanString(const StreamFormat& fmt, const char* input, debug("pcre_exec match \"%.*s\" result = %d\n", length, input, rc); if ((subexpr && rc <= subexpr) || rc < 0) { - /* error or no match or not enough sub-expressions */ + // error or no match or not enough sub-expressions return -1; } if (fmt.flags & skip_flag) return ovector[subexpr*2+1]; @@ -148,40 +152,41 @@ scanString(const StreamFormat& fmt, const char* input, } memcpy(value, input + ovector[subexpr*2], l); value[l] = '\0'; - return ovector[1]; /* consume input until end of match */; + return ovector[1]; // consume input until end of match } -static void regsubst(pcre* code, StreamBuffer& buffer, long start, long length, const char* subst, int max) +static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, long start) { - int rc, l, c, r, rl, n=0; + const char* subst = fmt.info; + pcre* code = extract