487 lines
14 KiB
C
487 lines
14 KiB
C
#define ident "1B03"
|
||
#ifdef VAXC
|
||
#module StrEdit ident
|
||
#endif
|
||
#ifdef __DECC
|
||
#pragma module StrEdit ident
|
||
#endif
|
||
/*
|
||
** +--------------------------------------------------------------+
|
||
** | Paul Scherrer Institute |
|
||
** | Department ASQ |
|
||
** | |
|
||
** | This software may be used freely by non-profit organizations.|
|
||
** | It may be copied provided that the name of P.S.I. and of the |
|
||
** | author is included. Neither P.S.I. nor the author assume any |
|
||
** | responsibility for the use of this software outside of P.S.I.|
|
||
** +--------------------------------------------------------------+
|
||
**
|
||
** Module Name . . . . . . . . : [...LIB.SINQ]StrEdit.C
|
||
**
|
||
** Author . . . . . . . . . . : D. Maden
|
||
** Date of creation . . . . . . : Jan 1996
|
||
**
|
||
** To compile this module, use:
|
||
|
||
$ import tasmad
|
||
$ define/group sinq_c_tlb mad_lib:sinq_c.tlb
|
||
$ cc /debug /noopt /obj=[]StrEdit -
|
||
tasmad_disk:[mad.lib.sinq]StrEdit +
|
||
sinq_c_tlb/lib
|
||
|
||
** To include this module in SINQ.OLB, use:
|
||
|
||
$ import tasmad
|
||
$ define/group sinq_c_tlb mad_lib:sinq_c.tlb
|
||
$
|
||
$ define/group sinq_olb mad_lib:sinq_dbg.olb
|
||
$ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit debug
|
||
$
|
||
$ define/group sinq_olb mad_lib:sinq.olb
|
||
$ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit
|
||
**
|
||
** Updates:
|
||
** 1A01 19-Jan-1996 DM. Initial version.
|
||
** 1B01 21-Mar-1996 DM. Move from DELTAT.OLB to SINQ.OLB.
|
||
**============================================================================
|
||
** The following entry points are included in this module:
|
||
**
|
||
**-------------------------------------------------------------------------
|
||
** #include <sinq_prototypes.h>
|
||
**
|
||
** char *StrEdit (char *out, char *in, char *ctrl, int *ln)
|
||
** -------
|
||
** Input Args:
|
||
** in - the string to be edited.
|
||
** ctrl - the string specifying what is to be done. See Description
|
||
** below.
|
||
** Output Args:
|
||
** out - the edited string. The maximum size of this string must
|
||
** be specified as input parameter *ln. The string
|
||
** will be zero terminated on return.
|
||
** Modified Args:
|
||
** *ln - an integer specifying, on input, the length of "out" in
|
||
** bytes. This must include room for the zero termination.
|
||
** On return, ln will be set to the number of characters
|
||
** copied to "out" (not counting the zero termination byte).
|
||
** Return value:
|
||
** If an error is detected, the return value is a NULL pointer. Otherwise
|
||
** it is a pointer to the resulting string (i.e. "out").
|
||
** Global variables:
|
||
** none
|
||
** Routines called:
|
||
** none
|
||
** Description:
|
||
** StrEdit (out, in, ctrl, ln) - This routine is intended to mimic the
|
||
** OpenVMS DCL lexical function F$EDIT.
|
||
**
|
||
** It first processes the string "in" to convert any C-style
|
||
** escape sequences introduced by a '\' character. Recognised
|
||
** escape sequences are:
|
||
** \a --> \007 BEL
|
||
** \b --> \010 BS (backspace)
|
||
** \f --> \014 FF (formfeed)
|
||
** \n --> \012 LF (linefeed)
|
||
** \r --> \015 CR (carriage return)
|
||
** \t --> \011 HT (horizontal tab)
|
||
** \v --> \013 VT (vertical tab)
|
||
** \\ --> \
|
||
** \' --> '
|
||
** \" --> "
|
||
** \? --> ?
|
||
** \xhh --> hh are an arbitrary number of hex digits.
|
||
** \nnn --> nnn are up to 3 octal digits.
|
||
** Any unrecognised escape sequence will be left unchanged.
|
||
**
|
||
** The resulting string is then edited according to the
|
||
** keywords specified in the control string "ctrl". The result
|
||
** will be written to string "out". The "out" argument may be
|
||
** the same as "in".
|
||
**
|
||
** On entry, "ln" specifies the size of "out" in bytes, including
|
||
** space for a null terminating byte. On return, it is set to the
|
||
** length of the result (not counting the zero-terminator).
|
||
**
|
||
** The following control strings are recognised:
|
||
**
|
||
** COLLAPSE - Removes all spaces and tabs from the string.
|
||
** COMPRESS - Replaces multiple spaces and tabs with a
|
||
** single space.
|
||
** LOWERCASE - Makes the string lower case.
|
||
** TRIM - Removes leading and trailing spaces and tabs
|
||
** from the string.
|
||
** UNCOMMENT - Removes comments from the string.
|
||
** UPCASE - Makes the string upper case.
|
||
**
|
||
** All keywords must be specified in full. They may be separated
|
||
** by white-space or commas and be in upper or lower case.
|
||
**
|
||
** If the input string contains non-escaped double quotes ("),
|
||
** then the editing functions are not applied to substrings within
|
||
** these quotes ("), there must be an even number of such quotes
|
||
** and the quotes are not copied to the resulting string. On the
|
||
** other hand, escaped double quotes (\") are treated as normal
|
||
** characters.
|
||
**
|
||
** Return Status:
|
||
** StrEdit returns a pointer to "out". If any errors are detected (e.g. an
|
||
** odd number of quotes), string editing is abandoned and a null pointer
|
||
** is returned.
|
||
**
|
||
** Example:
|
||
** strcpy (in, " asdfg \"hello there\" folks ");
|
||
** len = sizeof (in);
|
||
** printf ("\"%s\"\n", StrEdit (in, in, "trim upcase compress", &len));
|
||
** will generate
|
||
** "ASDFG hello there FOLKS"
|
||
**-------------------------------------------------------------------------
|
||
** Global Definitions
|
||
*/
|
||
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
#include <ctype.h>
|
||
#include <string.h>
|
||
#include <sinq_prototypes.h>
|
||
|
||
#define NIL '\0'
|
||
#define True 1
|
||
#define False 0
|
||
#define QUOTE ((char) (('\"' ^ 0xff) & 0xff))
|
||
/*
|
||
**====================================================================
|
||
*/
|
||
/*
|
||
**====================================================================
|
||
*/
|
||
/*--------------------------------------------------------------------------
|
||
** Global Variables
|
||
*/
|
||
/*
|
||
**---------------------------------------------------------------------------
|
||
** StrEdit - edit a string.
|
||
** Note: strncat is used exclusively rather than
|
||
** strncpy to be sure result is always
|
||
** null terminated.
|
||
*/
|
||
char *StrEdit(
|
||
/* =======
|
||
*/ char *out,
|
||
char *in, char *ctrl, int *ln)
|
||
{
|
||
|
||
int i, j, k, l, m, len, inxt, out_size;
|
||
char my_ctrl[80];
|
||
char *tok_nxt, *my_in, *my_out, *my_tmp, *nxt;
|
||
int do_collapse, do_compress, do_lowercase, do_trim;
|
||
int do_uncomment, do_upcase;
|
||
|
||
out_size = *ln;
|
||
if (out_size < 1) {
|
||
*ln = 0;
|
||
return NULL;
|
||
}
|
||
/* Can't do anything!! */
|
||
if (strlen(in) <= 0) {
|
||
*out = NIL;
|
||
*ln = 0;
|
||
return out; /* Nothing to do!! */
|
||
}
|
||
/*
|
||
** Scan ctrl looking to see what has to be done. Do this by first
|
||
** taking a copy of it (in case it is declared "const" in the calling
|
||
** routine, convert to lowercase and split into tokens at any space,
|
||
** tab or comma.
|
||
*/
|
||
len = strlen(ctrl);
|
||
if (len >= sizeof(my_ctrl)) {
|
||
*out = NIL;
|
||
*ln = 0;
|
||
return NULL;
|
||
}
|
||
for (i = 0; i <= len; i++)
|
||
my_ctrl[i] = tolower(ctrl[i]);
|
||
|
||
do_collapse = do_compress = do_lowercase = do_trim = do_uncomment =
|
||
do_upcase = False;
|
||
tok_nxt = strtok(my_ctrl, ", \t\f\v\n");
|
||
while (tok_nxt != NULL) {
|
||
if (strcmp(tok_nxt, "collapse") == 0) {
|
||
do_collapse = True;
|
||
} else if (strcmp(tok_nxt, "compress") == 0) {
|
||
do_compress = True;
|
||
} else if (strcmp(tok_nxt, "lowercase") == 0) {
|
||
do_lowercase = True;
|
||
} else if (strcmp(tok_nxt, "trim") == 0) {
|
||
do_trim = True;
|
||
} else if (strcmp(tok_nxt, "uncomment") == 0) {
|
||
do_uncomment = True;
|
||
} else if (strcmp(tok_nxt, "upcase") == 0) {
|
||
do_upcase = True;
|
||
} else {
|
||
*out = NIL;
|
||
*ln = 0;
|
||
return NULL; /* Illegal ctrl verb */
|
||
}
|
||
tok_nxt = strtok(NULL, ", \t\f\v\n");
|
||
}
|
||
|
||
len = strlen(in) + 1;
|
||
my_in = malloc(len); /* Get some working space */
|
||
if (my_in == NULL) {
|
||
*out = NIL;
|
||
*ln = 0;
|
||
return NULL;
|
||
}
|
||
/*
|
||
** Copy "in" to the "my_in" working space, processing any '\' escape
|
||
** sequences as we go. Note that, since "my_in" is big enough to hold
|
||
** "in" and the escape sequence processing can only shorten the length
|
||
** of "in", there's no need to check for an overflow of "my_in". Any
|
||
** non-escaped double quotes are converted to something special so
|
||
** that they can be recognised at the editing stage.
|
||
*/
|
||
nxt = my_in;
|
||
while (*in != '\0') {
|
||
if (*in == '\\') { /* Look for escape sequence */
|
||
in++;
|
||
switch (*in) {
|
||
case 'a':
|
||
case 'A':
|
||
*nxt++ = '\007';
|
||
in++;
|
||
break;
|
||
case 'b':
|
||
case 'B':
|
||
*nxt++ = '\010';
|
||
in++;
|
||
break;
|
||
case 'f':
|
||
case 'F':
|
||
*nxt++ = '\014';
|
||
in++;
|
||
break;
|
||
case 'n':
|
||
case 'N':
|
||
*nxt++ = '\012';
|
||
in++;
|
||
break;
|
||
case 'r':
|
||
case 'R':
|
||
*nxt++ = '\015';
|
||
in++;
|
||
break;
|
||
case 't':
|
||
case 'T':
|
||
*nxt++ = '\011';
|
||
in++;
|
||
break;
|
||
case 'v':
|
||
case 'V':
|
||
*nxt++ = '\013';
|
||
in++;
|
||
break;
|
||
case '\\':
|
||
*nxt++ = '\\';
|
||
in++;
|
||
break;
|
||
case '\'':
|
||
*nxt++ = '\'';
|
||
in++;
|
||
break;
|
||
case '\"':
|
||
*nxt++ = '\"';
|
||
in++;
|
||
break;
|
||
case '\?':
|
||
*nxt++ = '\?';
|
||
in++;
|
||
break;
|
||
case 'x':
|
||
case 'X':
|
||
in++;
|
||
i = strspn(in, "0123456789abcdefABCDEF");
|
||
if (i > 0) {
|
||
*nxt++ = strtol(in, &in, 16);
|
||
break;
|
||
} else {
|
||
*nxt++ = '\\';
|
||
break;
|
||
}
|
||
case '0':
|
||
case '1':
|
||
case '2':
|
||
case '3':
|
||
case '4':
|
||
case '5':
|
||
case '6':
|
||
case '7':
|
||
i = strspn(in, "01234567");
|
||
if (i > 3) {
|
||
sscanf(in, "%3o", &j);
|
||
*nxt++ = j;
|
||
in += 3;
|
||
break;
|
||
} else if (i > 0) {
|
||
sscanf(in, "%o", &j);
|
||
*nxt++ = j;
|
||
in += i;
|
||
break;
|
||
} else {
|
||
*nxt++ = '\\';
|
||
break;
|
||
}
|
||
default:
|
||
*nxt++ = '\\'; /* Invalid esc sequ - just copy it */
|
||
}
|
||
} else if (*in == '\"') { /* Look for non-escaped double quotes */
|
||
*nxt++ = QUOTE;
|
||
*in++; /* Make it something unlikely */
|
||
} else {
|
||
*nxt++ = *in++;
|
||
}
|
||
}
|
||
*nxt = '\0';
|
||
|
||
my_out = malloc(len); /* Get some working space */
|
||
if (my_out == NULL) {
|
||
free(my_in);
|
||
*out = NIL;
|
||
*ln = 0;
|
||
return NULL;
|
||
}
|
||
*my_out = NIL;
|
||
|
||
my_tmp = malloc(len); /* Get some working space */
|
||
if (my_tmp == NULL) {
|
||
free(my_out);
|
||
free(my_in);
|
||
*out = NIL;
|
||
*ln = 0;
|
||
return NULL;
|
||
}
|
||
*my_tmp = NIL;
|
||
*out = NIL;
|
||
/*
|
||
** Ensure "in" has an even number of non-escaped quotes. Return if not.
|
||
*/
|
||
i = 0;
|
||
for (j = 0; my_in[j] != NIL; j++)
|
||
if (my_in[j] == QUOTE)
|
||
i++;
|
||
if ((i & 1) == 1) {
|
||
free(my_tmp);
|
||
free(my_out);
|
||
free(my_in);
|
||
*ln = strlen(out);
|
||
return NULL;
|
||
}
|
||
/*
|
||
** Scan through "in", substring by substring, to
|
||
** handle quotation marks correctly.
|
||
*/
|
||
inxt = 0;
|
||
while (my_in[inxt] != NIL) {
|
||
if (my_in[inxt] == QUOTE) { /* Is there a quoted string next? */
|
||
nxt = strchr(&my_in[inxt + 1], QUOTE); /* Yes, find matching quote. */
|
||
j = nxt - &my_in[inxt + 1];
|
||
memcpy(my_tmp, &my_in[inxt + 1], j); /* Make copy of it */
|
||
my_tmp[j] = NIL;
|
||
inxt = inxt + j + 2;
|
||
} else {
|
||
nxt = strchr(&my_in[inxt], QUOTE); /* Not a quoted string; ..
|
||
** .. find next non-escaped ..
|
||
** .. quote.
|
||
*/
|
||
if (nxt != NULL) {
|
||
j = nxt - my_in - inxt;
|
||
} else {
|
||
j = strlen(&my_in[inxt]);
|
||
}
|
||
memcpy(my_tmp, &my_in[inxt], j); /* Make copy for us to work on */
|
||
my_tmp[j] = NIL;
|
||
inxt = inxt + j;
|
||
/*
|
||
** For collapse and compress, start by turning all white space
|
||
** chars to spaces.
|
||
*/
|
||
if (do_collapse || do_compress) {
|
||
for (k = 0; my_tmp[k] != NIL; k++) {
|
||
if (my_tmp[k] == '\t')
|
||
my_tmp[k] = ' ';
|
||
if (my_tmp[k] == '\f')
|
||
my_tmp[k] = ' ';
|
||
if (my_tmp[k] == '\v')
|
||
my_tmp[k] = ' ';
|
||
if (my_tmp[k] == '\n')
|
||
my_tmp[k] = ' ';
|
||
}
|
||
if (do_collapse) {
|
||
l = 0;
|
||
for (k = 0; my_tmp[k] != NIL; k++) {
|
||
if (my_tmp[k] != ' ') {
|
||
my_tmp[l] = my_tmp[k];
|
||
l++;
|
||
}
|
||
}
|
||
my_tmp[l] = NIL;
|
||
} else if (do_compress) {
|
||
for (k = 0; my_tmp[k] != NIL; k++) {
|
||
if (my_tmp[k] == ' ') {
|
||
l = strspn(&my_tmp[k], " ");
|
||
if (l > 1) {
|
||
for (m = 0; my_tmp[k + l + m] != NIL; m++) {
|
||
my_tmp[k + m + 1] = my_tmp[k + l + m];
|
||
}
|
||
my_tmp[k + m + 1] = NIL;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if (do_lowercase) {
|
||
for (k = 0; my_tmp[k] != NIL; k++)
|
||
my_tmp[k] = _tolower(my_tmp[k]);
|
||
}
|
||
if (do_upcase) {
|
||
for (k = 0; my_tmp[k] != NIL; k++)
|
||
my_tmp[k] = _toupper(my_tmp[k]);
|
||
}
|
||
if (do_uncomment) {
|
||
nxt = strchr(my_tmp, '!');
|
||
if (nxt != NULL) {
|
||
*nxt = NIL; /* Truncate the string at the "!" */
|
||
my_in[inxt] = NIL; /* Stop processing loop too */
|
||
}
|
||
}
|
||
}
|
||
StrJoin(out, out_size, my_out, my_tmp);
|
||
strcpy(my_out, out);
|
||
}
|
||
|
||
if (do_trim) {
|
||
i = strspn(my_out, " ");
|
||
if (i == strlen(my_out)) { /* If all spaces, result is a null string */
|
||
*out = NIL;
|
||
} else {
|
||
for (j = strlen(my_out); my_out[j - 1] == ' '; j--);
|
||
my_out[j] = NIL;
|
||
}
|
||
strcpy(out, &my_out[i]);
|
||
}
|
||
free(my_tmp);
|
||
free(my_out);
|
||
free(my_in);
|
||
*ln = strlen(out);
|
||
/*
|
||
** Undo any encoded escape characters.
|
||
*/
|
||
for (i = 0; out[i] != NIL; i++) {
|
||
if (out[i] == ~'\"')
|
||
out[i] = '\"';
|
||
}
|
||
|
||
return out;
|
||
}
|
||
|
||
/*-------------------------------------------------- End of StrEdit.C -------*/
|