Files
sicspsi/hardsup/stredit.c
2009-02-13 09:01:24 +00:00

487 lines
14 KiB
C
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#define ident "1B03"
#ifdef VAXC
#module StrEdit ident
#endif
#ifdef __DECC
#pragma module StrEdit ident
#endif
/*
** +--------------------------------------------------------------+
** | Paul Scherrer Institute |
** | Department ASQ |
** | |
** | This software may be used freely by non-profit organizations.|
** | It may be copied provided that the name of P.S.I. and of the |
** | author is included. Neither P.S.I. nor the author assume any |
** | responsibility for the use of this software outside of P.S.I.|
** +--------------------------------------------------------------+
**
** Module Name . . . . . . . . : [...LIB.SINQ]StrEdit.C
**
** Author . . . . . . . . . . : D. Maden
** Date of creation . . . . . . : Jan 1996
**
** To compile this module, use:
$ import tasmad
$ define/group sinq_c_tlb mad_lib:sinq_c.tlb
$ cc /debug /noopt /obj=[]StrEdit -
tasmad_disk:[mad.lib.sinq]StrEdit +
sinq_c_tlb/lib
** To include this module in SINQ.OLB, use:
$ import tasmad
$ define/group sinq_c_tlb mad_lib:sinq_c.tlb
$
$ define/group sinq_olb mad_lib:sinq_dbg.olb
$ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit debug
$
$ define/group sinq_olb mad_lib:sinq.olb
$ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit
**
** Updates:
** 1A01 19-Jan-1996 DM. Initial version.
** 1B01 21-Mar-1996 DM. Move from DELTAT.OLB to SINQ.OLB.
**============================================================================
** The following entry points are included in this module:
**
**-------------------------------------------------------------------------
** #include <sinq_prototypes.h>
**
** char *StrEdit (char *out, char *in, char *ctrl, int *ln)
** -------
** Input Args:
** in - the string to be edited.
** ctrl - the string specifying what is to be done. See Description
** below.
** Output Args:
** out - the edited string. The maximum size of this string must
** be specified as input parameter *ln. The string
** will be zero terminated on return.
** Modified Args:
** *ln - an integer specifying, on input, the length of "out" in
** bytes. This must include room for the zero termination.
** On return, ln will be set to the number of characters
** copied to "out" (not counting the zero termination byte).
** Return value:
** If an error is detected, the return value is a NULL pointer. Otherwise
** it is a pointer to the resulting string (i.e. "out").
** Global variables:
** none
** Routines called:
** none
** Description:
** StrEdit (out, in, ctrl, ln) - This routine is intended to mimic the
** OpenVMS DCL lexical function F$EDIT.
**
** It first processes the string "in" to convert any C-style
** escape sequences introduced by a '\' character. Recognised
** escape sequences are:
** \a --> \007 BEL
** \b --> \010 BS (backspace)
** \f --> \014 FF (formfeed)
** \n --> \012 LF (linefeed)
** \r --> \015 CR (carriage return)
** \t --> \011 HT (horizontal tab)
** \v --> \013 VT (vertical tab)
** \\ --> \
** \' --> '
** \" --> "
** \? --> ?
** \xhh --> hh are an arbitrary number of hex digits.
** \nnn --> nnn are up to 3 octal digits.
** Any unrecognised escape sequence will be left unchanged.
**
** The resulting string is then edited according to the
** keywords specified in the control string "ctrl". The result
** will be written to string "out". The "out" argument may be
** the same as "in".
**
** On entry, "ln" specifies the size of "out" in bytes, including
** space for a null terminating byte. On return, it is set to the
** length of the result (not counting the zero-terminator).
**
** The following control strings are recognised:
**
** COLLAPSE - Removes all spaces and tabs from the string.
** COMPRESS - Replaces multiple spaces and tabs with a
** single space.
** LOWERCASE - Makes the string lower case.
** TRIM - Removes leading and trailing spaces and tabs
** from the string.
** UNCOMMENT - Removes comments from the string.
** UPCASE - Makes the string upper case.
**
** All keywords must be specified in full. They may be separated
** by white-space or commas and be in upper or lower case.
**
** If the input string contains non-escaped double quotes ("),
** then the editing functions are not applied to substrings within
** these quotes ("), there must be an even number of such quotes
** and the quotes are not copied to the resulting string. On the
** other hand, escaped double quotes (\") are treated as normal
** characters.
**
** Return Status:
** StrEdit returns a pointer to "out". If any errors are detected (e.g. an
** odd number of quotes), string editing is abandoned and a null pointer
** is returned.
**
** Example:
** strcpy (in, " asdfg \"hello there\" folks ");
** len = sizeof (in);
** printf ("\"%s\"\n", StrEdit (in, in, "trim upcase compress", &len));
** will generate
** "ASDFG hello there FOLKS"
**-------------------------------------------------------------------------
** Global Definitions
*/
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <sinq_prototypes.h>
#define NIL '\0'
#define True 1
#define False 0
#define QUOTE ((char) (('\"' ^ 0xff) & 0xff))
/*
**====================================================================
*/
/*
**====================================================================
*/
/*--------------------------------------------------------------------------
** Global Variables
*/
/*
**---------------------------------------------------------------------------
** StrEdit - edit a string.
** Note: strncat is used exclusively rather than
** strncpy to be sure result is always
** null terminated.
*/
char *StrEdit(
/* =======
*/ char *out,
char *in, char *ctrl, int *ln)
{
int i, j, k, l, m, len, inxt, out_size;
char my_ctrl[80];
char *tok_nxt, *my_in, *my_out, *my_tmp, *nxt;
int do_collapse, do_compress, do_lowercase, do_trim;
int do_uncomment, do_upcase;
out_size = *ln;
if (out_size < 1) {
*ln = 0;
return NULL;
}
/* Can't do anything!! */
if (strlen(in) <= 0) {
*out = NIL;
*ln = 0;
return out; /* Nothing to do!! */
}
/*
** Scan ctrl looking to see what has to be done. Do this by first
** taking a copy of it (in case it is declared "const" in the calling
** routine, convert to lowercase and split into tokens at any space,
** tab or comma.
*/
len = strlen(ctrl);
if (len >= sizeof(my_ctrl)) {
*out = NIL;
*ln = 0;
return NULL;
}
for (i = 0; i <= len; i++)
my_ctrl[i] = tolower(ctrl[i]);
do_collapse = do_compress = do_lowercase = do_trim = do_uncomment =
do_upcase = False;
tok_nxt = strtok(my_ctrl, ", \t\f\v\n");
while (tok_nxt != NULL) {
if (strcmp(tok_nxt, "collapse") == 0) {
do_collapse = True;
} else if (strcmp(tok_nxt, "compress") == 0) {
do_compress = True;
} else if (strcmp(tok_nxt, "lowercase") == 0) {
do_lowercase = True;
} else if (strcmp(tok_nxt, "trim") == 0) {
do_trim = True;
} else if (strcmp(tok_nxt, "uncomment") == 0) {
do_uncomment = True;
} else if (strcmp(tok_nxt, "upcase") == 0) {
do_upcase = True;
} else {
*out = NIL;
*ln = 0;
return NULL; /* Illegal ctrl verb */
}
tok_nxt = strtok(NULL, ", \t\f\v\n");
}
len = strlen(in) + 1;
my_in = malloc(len); /* Get some working space */
if (my_in == NULL) {
*out = NIL;
*ln = 0;
return NULL;
}
/*
** Copy "in" to the "my_in" working space, processing any '\' escape
** sequences as we go. Note that, since "my_in" is big enough to hold
** "in" and the escape sequence processing can only shorten the length
** of "in", there's no need to check for an overflow of "my_in". Any
** non-escaped double quotes are converted to something special so
** that they can be recognised at the editing stage.
*/
nxt = my_in;
while (*in != '\0') {
if (*in == '\\') { /* Look for escape sequence */
in++;
switch (*in) {
case 'a':
case 'A':
*nxt++ = '\007';
in++;
break;
case 'b':
case 'B':
*nxt++ = '\010';
in++;
break;
case 'f':
case 'F':
*nxt++ = '\014';
in++;
break;
case 'n':
case 'N':
*nxt++ = '\012';
in++;
break;
case 'r':
case 'R':
*nxt++ = '\015';
in++;
break;
case 't':
case 'T':
*nxt++ = '\011';
in++;
break;
case 'v':
case 'V':
*nxt++ = '\013';
in++;
break;
case '\\':
*nxt++ = '\\';
in++;
break;
case '\'':
*nxt++ = '\'';
in++;
break;
case '\"':
*nxt++ = '\"';
in++;
break;
case '\?':
*nxt++ = '\?';
in++;
break;
case 'x':
case 'X':
in++;
i = strspn(in, "0123456789abcdefABCDEF");
if (i > 0) {
*nxt++ = strtol(in, &in, 16);
break;
} else {
*nxt++ = '\\';
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
i = strspn(in, "01234567");
if (i > 3) {
sscanf(in, "%3o", &j);
*nxt++ = j;
in += 3;
break;
} else if (i > 0) {
sscanf(in, "%o", &j);
*nxt++ = j;
in += i;
break;
} else {
*nxt++ = '\\';
break;
}
default:
*nxt++ = '\\'; /* Invalid esc sequ - just copy it */
}
} else if (*in == '\"') { /* Look for non-escaped double quotes */
*nxt++ = QUOTE;
*in++; /* Make it something unlikely */
} else {
*nxt++ = *in++;
}
}
*nxt = '\0';
my_out = malloc(len); /* Get some working space */
if (my_out == NULL) {
free(my_in);
*out = NIL;
*ln = 0;
return NULL;
}
*my_out = NIL;
my_tmp = malloc(len); /* Get some working space */
if (my_tmp == NULL) {
free(my_out);
free(my_in);
*out = NIL;
*ln = 0;
return NULL;
}
*my_tmp = NIL;
*out = NIL;
/*
** Ensure "in" has an even number of non-escaped quotes. Return if not.
*/
i = 0;
for (j = 0; my_in[j] != NIL; j++)
if (my_in[j] == QUOTE)
i++;
if ((i & 1) == 1) {
free(my_tmp);
free(my_out);
free(my_in);
*ln = strlen(out);
return NULL;
}
/*
** Scan through "in", substring by substring, to
** handle quotation marks correctly.
*/
inxt = 0;
while (my_in[inxt] != NIL) {
if (my_in[inxt] == QUOTE) { /* Is there a quoted string next? */
nxt = strchr(&my_in[inxt + 1], QUOTE); /* Yes, find matching quote. */
j = nxt - &my_in[inxt + 1];
memcpy(my_tmp, &my_in[inxt + 1], j); /* Make copy of it */
my_tmp[j] = NIL;
inxt = inxt + j + 2;
} else {
nxt = strchr(&my_in[inxt], QUOTE); /* Not a quoted string; ..
** .. find next non-escaped ..
** .. quote.
*/
if (nxt != NULL) {
j = nxt - my_in - inxt;
} else {
j = strlen(&my_in[inxt]);
}
memcpy(my_tmp, &my_in[inxt], j); /* Make copy for us to work on */
my_tmp[j] = NIL;
inxt = inxt + j;
/*
** For collapse and compress, start by turning all white space
** chars to spaces.
*/
if (do_collapse || do_compress) {
for (k = 0; my_tmp[k] != NIL; k++) {
if (my_tmp[k] == '\t')
my_tmp[k] = ' ';
if (my_tmp[k] == '\f')
my_tmp[k] = ' ';
if (my_tmp[k] == '\v')
my_tmp[k] = ' ';
if (my_tmp[k] == '\n')
my_tmp[k] = ' ';
}
if (do_collapse) {
l = 0;
for (k = 0; my_tmp[k] != NIL; k++) {
if (my_tmp[k] != ' ') {
my_tmp[l] = my_tmp[k];
l++;
}
}
my_tmp[l] = NIL;
} else if (do_compress) {
for (k = 0; my_tmp[k] != NIL; k++) {
if (my_tmp[k] == ' ') {
l = strspn(&my_tmp[k], " ");
if (l > 1) {
for (m = 0; my_tmp[k + l + m] != NIL; m++) {
my_tmp[k + m + 1] = my_tmp[k + l + m];
}
my_tmp[k + m + 1] = NIL;
}
}
}
}
}
if (do_lowercase) {
for (k = 0; my_tmp[k] != NIL; k++)
my_tmp[k] = _tolower(my_tmp[k]);
}
if (do_upcase) {
for (k = 0; my_tmp[k] != NIL; k++)
my_tmp[k] = _toupper(my_tmp[k]);
}
if (do_uncomment) {
nxt = strchr(my_tmp, '!');
if (nxt != NULL) {
*nxt = NIL; /* Truncate the string at the "!" */
my_in[inxt] = NIL; /* Stop processing loop too */
}
}
}
StrJoin(out, out_size, my_out, my_tmp);
strcpy(my_out, out);
}
if (do_trim) {
i = strspn(my_out, " ");
if (i == strlen(my_out)) { /* If all spaces, result is a null string */
*out = NIL;
} else {
for (j = strlen(my_out); my_out[j - 1] == ' '; j--);
my_out[j] = NIL;
}
strcpy(out, &my_out[i]);
}
free(my_tmp);
free(my_out);
free(my_in);
*ln = strlen(out);
/*
** Undo any encoded escape characters.
*/
for (i = 0; out[i] != NIL; i++) {
if (out[i] == ~'\"')
out[i] = '\"';
}
return out;
}
/*-------------------------------------------------- End of StrEdit.C -------*/