diff --git a/doc/nav.html b/doc/nav.html index 40d04de..11650bb 100644 --- a/doc/nav.html +++ b/doc/nav.html @@ -178,6 +178,7 @@ div div div a {list-style-type:circle;} Write more than one value in one message Read more than one value from one message Read values of mixed data type + Read a web page diff --git a/doc/tipsandtricks.html b/doc/tipsandtricks.html index d1391c3..1458479 100644 --- a/doc/tipsandtricks.html +++ b/doc/tipsandtricks.html @@ -103,7 +103,7 @@ an array: (3.14, 17.30, -12.34)

B) We have up to 12 numeric values

Use a calcout record and -field references in the format. +redirection to fields.

@@ -128,7 +128,7 @@ record (calcout, "$(RECORD)") {

C) Values are in other records on the same IOC

-Use record references in the format. +Use redirection to records.

@@ -216,7 +216,7 @@ Any non-matching input is ignored by record B.

C) Values should be stored in other records on the same IOC

-Use record references in the format. +Use redirection to records. To avoid record names in protocol files, use protocol arguments.

@@ -244,11 +244,11 @@ processes record B.

-

I have a device that sends mixed data types: numbers and strings

+

I have a device that sends mixed data types: numbers or strings

Use a @mismatch exception handler and -record references in the format. +redirection to records. To avoid record names in protocol files, use protocol arguments.

@@ -289,9 +289,124 @@ record (stringout, "$(DEVICE):clean_2") {
  field (VAL, "OK")
  field (OUT, "$(DEVICE):message PP")
}
-
+ +

I need to read a web page

+

+First you have to send a correctly formatted HTML request. +Note that this request must contain the full URL like +"http://server/page" and must be terminated with two newlines. +The server should be the same as in the +drvAsynIPPortConfigure +command (if not using a http proxy). + +The web page you get often contains much more information than you need. +Regular expressions are great +to find what you are looking for. +

+

Example 1

+

+Read the title of a web page. +

+

+ +get_title {
+  extrainput = ignore;
+  replyTimeout = 1000;
+  out "GET http://\$1\n\n";
+  in "%+.1/(?im)<title>(.*)<\/title>/";
+} +
+

+

+Terminate the request with two newlines, either explicit like here +or using an +outTerminator. +The URI (without http:// but including the web server host name) +is passed as argument 1 to \$1. +Note that web servers may be slow, so allow some +replyTimeout. +

+

+If you don't use an inTerminator then the whole page is +read as one "line" to the in command and can be parsed easily +with a regular expression. +We want to see the string between <title> and +</title>, so we put it into a subexpression in +() and request the first subexpression with .1. +Note that the / in the closing tag has be be escaped +to avoid a misinterpretation as the closing / of the regular +expression. +

+

+The tags may be upper or lower case like <TITLE> or +<Title>, so we ask for case insensitive matching with +(?i). +

+

+The string should be terminated with the first closing +</title>, not the last one in the file. +(There should not be more than one title but you never know.) +Thus we ask not to be greedy with (?m). +(?i) and (?m) can be combined to (?im). +See the PCRE documentation for more regexp syntax. +

+

+The regular expression matcher ignores and discards any content before the +matching section. +Content after the match is discarded with extrainput = ignore +so that it does not trigger errors reporting "surplus input". +

+

+Finally, the title may be too long for the record. +The + tells the format matcher not to fail in this case +but to truncate the string instead. +You can read the string with a stringin record or for longer strings with +a waveform record with data type CHAR. +

+

+ +record (stringin, "$(DEVICE):title") {
+  field (DTYP, "stream")
+  field (INP, "@$(DEVICETYPE).proto get_title($(PAGE)) $(BUS)")
+}
+record (waveform, "$(DEVICE):longtitle") {
+  field (DTYP, "stream")
+  field (INP, "@$(DEVICETYPE).proto get_title($(PAGE)) $(BUS)")
+  field (FTVL, "CHAR")
+  field (NELM, "100")
+}
+
+

+ +

Example 2

+

+Read a number from a web page. First we have to locate the number. +For that we match against any known string right before the number +(and discard the match with *). +Then we read the number. +

+ +get_title {
+  extrainput = ignore;
+  replyTimeout = 1000;
+  out "GET http://\$1\n\n";
+  in "%*/Interesting value:/%f more text";
+} +
+

+When using extrainput = ignore;, it is always a good idea to +match a few bytes after the value, too. +This catches errors where loading of the page is interrupted in the middle +of the number. (You don't want to miss the exponent from something like 1.23E-14). +

+

+You can read more than one value from a file with successive regular expressions +and redirections. +But this only works if the order of the values is predictible. +StreamDevice is not an XML parser! It always reads sequentially. +


-

Dirk Zimoch, 2007

+

Dirk Zimoch, 2012