sicspsi/hardsup/stredit.c

#define	    ident	"1B03"
#ifdef VAXC
#module	    StrEdit	ident
#endif
#ifdef __DECC
#pragma	    module	    StrEdit	ident
#endif
/*
** +--------------------------------------------------------------+
** |                  Paul Scherrer Institute                     |
** |                     Department ASQ				  |
** |                                                              |
** | This software may be used freely by non-profit organizations.|
** | It may be copied provided that the name of P.S.I. and of the |
** | author is included. Neither P.S.I. nor the author assume any |
** | responsibility for the use of this software outside of P.S.I.|
** +--------------------------------------------------------------+
**
** Module Name  . . . . . . . . : [...LIB.SINQ]StrEdit.C
**
** Author   . . . . . . . . . . : D. Maden
** Date of creation . . . . . . : Jan 1996
**
**  To compile this module, use:

	$ import tasmad
	$ define/group sinq_c_tlb mad_lib:sinq_c.tlb
	$ cc /debug /noopt /obj=[]StrEdit -
			tasmad_disk:[mad.lib.sinq]StrEdit +
			sinq_c_tlb/lib

**  To include this module in SINQ.OLB, use:

  $ import tasmad
  $ define/group sinq_c_tlb mad_lib:sinq_c.tlb
  $
  $ define/group sinq_olb mad_lib:sinq_dbg.olb
  $ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit debug
  $
  $ define/group sinq_olb mad_lib:sinq.olb
  $ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit
**
** Updates:
**  1A01 19-Jan-1996 DM.	Initial version.
**  1B01 21-Mar-1996 DM.	Move from DELTAT.OLB to SINQ.OLB.
**============================================================================
** The following entry points are included in this module:
**
**-------------------------------------------------------------------------
**    #include <sinq_prototypes.h>
**
**    char *StrEdit (char *out, char *in, char *ctrl, int *ln)
**	    -------
**  Input Args:
**	in	    - the string to be edited.
**	ctrl	    - the string specifying what is to be done. See Description
**			below.
**  Output Args:
**	out	    - the edited string. The maximum size of this string must
**			be specified as input parameter *ln. The string
**			will be zero terminated on return.
**  Modified Args:
**	*ln	    - an integer specifying, on input, the length of "out" in
**		      bytes. This must include room for the zero termination.
**		      On return, ln will be set to the number of characters
**		      copied to "out" (not counting the zero termination byte).
**  Return value:
**	If an error is detected, the return value is a NULL pointer. Otherwise
**	it is a pointer to the resulting string (i.e. "out").
**  Global variables:
**	none
**  Routines called:
**	none
**  Description:
**    StrEdit (out, in, ctrl, ln)  - This routine is intended to mimic the
**		OpenVMS DCL lexical function F$EDIT.
**
**		It first processes the string "in" to convert any C-style
**		escape sequences introduced by a '\' character. Recognised
**		escape sequences are:
**		    \a    --> \007 BEL
**		    \b    --> \010 BS  (backspace)
**		    \f    --> \014 FF  (formfeed)
**		    \n    --> \012 LF  (linefeed)
**		    \r    --> \015 CR  (carriage return)
**		    \t    --> \011 HT  (horizontal tab)
**		    \v    --> \013 VT  (vertical tab)
**		    \\    --> \
**		    \'    --> '
**		    \"    --> "
**		    \?    --> ?
**		    \xhh  --> hh are an arbitrary number of hex digits.
**		    \nnn  --> nnn are up to 3 octal digits.
**		Any unrecognised escape sequence will be left unchanged.
**
**		The resulting string is then edited according to the
**		keywords specified in the control string "ctrl". The result
**		will be written to string "out". The "out" argument may be
**		the same as "in".
**
**		On entry, "ln" specifies the size of "out" in bytes, including
**		space for a null terminating byte. On return, it is set to the
**		length of the result (not counting the zero-terminator).
**
**		The following control strings are recognised:
**
**		    COLLAPSE	- Removes all spaces and tabs from the string.
**		    COMPRESS	- Replaces multiple spaces and tabs with a
**				    single space.
**		    LOWERCASE	- Makes the string lower case.
**		    TRIM	- Removes leading and trailing spaces and tabs
**				    from the string.
**		    UNCOMMENT	- Removes comments from the string.
**		    UPCASE	- Makes the string upper case.
**
**		All keywords must be specified in full. They may be separated
**		by white-space or commas and be in upper or lower case.
**
**		If the input string contains non-escaped double quotes ("),
**		then the editing functions are not applied to substrings within
**		these quotes ("), there must be an even number of such quotes
**		and the quotes are not copied to the resulting string. On the
**		other hand, escaped double quotes (\") are treated as normal
**		characters.
**
**  Return Status:
**   StrEdit returns a pointer to "out". If any errors are detected (e.g. an
**   odd number of quotes), string editing is abandoned and a null pointer
**   is returned.
**
**  Example:
**	    strcpy (in, "   asdfg    \"hello   there\"  folks   ");
**	    len = sizeof (in);
**	    printf ("\"%s\"\n", StrEdit (in, in, "trim upcase compress", &len));
**   will generate
**          "ASDFG hello   there FOLKS"
**-------------------------------------------------------------------------
**			Global Definitions
*/
#include    <stdlib.h>
#include    <stdio.h>
#include    <ctype.h>
#include    <string.h>
#include    <sinq_prototypes.h>

#define NIL	'\0'
#define	True	1
#define	False	0
#define QUOTE	((char) (('\"' ^ 0xff) & 0xff))
/*
**====================================================================
*/
/*
**====================================================================
*/
/*--------------------------------------------------------------------------
**			Global Variables
*/
/*
**---------------------------------------------------------------------------
**		StrEdit	- edit a string.
**			    Note: strncat is used exclusively rather than
**				  strncpy to be sure result is always
**				  null terminated.
*/
char *StrEdit(
/*	 =======
*/ char *out,
               char *in, char *ctrl, int *ln)
{

  int i, j, k, l, m, len, inxt, out_size;
  char my_ctrl[80];
  char *tok_nxt, *my_in, *my_out, *my_tmp, *nxt;
  int do_collapse, do_compress, do_lowercase, do_trim;
  int do_uncomment, do_upcase;

  out_size = *ln;
  if (out_size < 1) {
    *ln = 0;
    return NULL;
  }
  /* Can't do anything!! */
  if (strlen(in) <= 0) {
    *out = NIL;
    *ln = 0;
    return out;                 /* Nothing to do!! */
  }
  /*
   ** Scan ctrl looking to see what has to be done. Do this by first
   ** taking a copy of it (in case it is declared "const" in the calling
   ** routine, convert to lowercase and split into tokens at any space,
   ** tab or comma.
   */
  len = strlen(ctrl);
  if (len >= sizeof(my_ctrl)) {
    *out = NIL;
    *ln = 0;
    return NULL;
  }
  for (i = 0; i <= len; i++)
    my_ctrl[i] = tolower(ctrl[i]);

  do_collapse = do_compress = do_lowercase = do_trim = do_uncomment =
      do_upcase = False;
  tok_nxt = strtok(my_ctrl, ", \t\f\v\n");
  while (tok_nxt != NULL) {
    if (strcmp(tok_nxt, "collapse") == 0) {
      do_collapse = True;
    } else if (strcmp(tok_nxt, "compress") == 0) {
      do_compress = True;
    } else if (strcmp(tok_nxt, "lowercase") == 0) {
      do_lowercase = True;
    } else if (strcmp(tok_nxt, "trim") == 0) {
      do_trim = True;
    } else if (strcmp(tok_nxt, "uncomment") == 0) {
      do_uncomment = True;
    } else if (strcmp(tok_nxt, "upcase") == 0) {
      do_upcase = True;
    } else {
      *out = NIL;
      *ln = 0;
      return NULL;              /* Illegal ctrl verb */
    }
    tok_nxt = strtok(NULL, ", \t\f\v\n");
  }

  len = strlen(in) + 1;
  my_in = malloc(len);          /* Get some working space */
  if (my_in == NULL) {
    *out = NIL;
    *ln = 0;
    return NULL;
  }
  /*
   ** Copy "in" to the "my_in" working space, processing any '\' escape
   ** sequences as we go. Note that, since "my_in" is big enough to hold
   ** "in" and the escape sequence processing can only shorten the length
   ** of "in", there's no need to check for an overflow of "my_in". Any
   ** non-escaped double quotes are converted to something special so
   ** that they can be recognised at the editing stage.
   */
  nxt = my_in;
  while (*in != '\0') {
    if (*in == '\\') {          /* Look for escape sequence */
      in++;
      switch (*in) {
      case 'a':
      case 'A':
        *nxt++ = '\007';
        in++;
        break;
      case 'b':
      case 'B':
        *nxt++ = '\010';
        in++;
        break;
      case 'f':
      case 'F':
        *nxt++ = '\014';
        in++;
        break;
      case 'n':
      case 'N':
        *nxt++ = '\012';
        in++;
        break;
      case 'r':
      case 'R':
        *nxt++ = '\015';
        in++;
        break;
      case 't':
      case 'T':
        *nxt++ = '\011';
        in++;
        break;
      case 'v':
      case 'V':
        *nxt++ = '\013';
        in++;
        break;
      case '\\':
        *nxt++ = '\\';
        in++;
        break;
      case '\'':
        *nxt++ = '\'';
        in++;
        break;
      case '\"':
        *nxt++ = '\"';
        in++;
        break;
      case '\?':
        *nxt++ = '\?';
        in++;
        break;
      case 'x':
      case 'X':
        in++;
        i = strspn(in, "0123456789abcdefABCDEF");
        if (i > 0) {
          *nxt++ = strtol(in, &in, 16);
          break;
        } else {
          *nxt++ = '\\';
          break;
        }
      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
        i = strspn(in, "01234567");
        if (i > 3) {
          sscanf(in, "%3o", &j);
          *nxt++ = j;
          in += 3;
          break;
        } else if (i > 0) {
          sscanf(in, "%o", &j);
          *nxt++ = j;
          in += i;
          break;
        } else {
          *nxt++ = '\\';
          break;
        }
      default:
        *nxt++ = '\\';          /* Invalid esc sequ - just copy it */
      }
    } else if (*in == '\"') {   /* Look for non-escaped double quotes */
      *nxt++ = QUOTE;
      *in++;                    /* Make it something unlikely */
    } else {
      *nxt++ = *in++;
    }
  }
  *nxt = '\0';

  my_out = malloc(len);         /* Get some working space */
  if (my_out == NULL) {
    free(my_in);
    *out = NIL;
    *ln = 0;
    return NULL;
  }
  *my_out = NIL;

  my_tmp = malloc(len);         /* Get some working space */
  if (my_tmp == NULL) {
    free(my_out);
    free(my_in);
    *out = NIL;
    *ln = 0;
    return NULL;
  }
  *my_tmp = NIL;
  *out = NIL;
  /*
   ** Ensure "in" has an even number of non-escaped quotes. Return if not.
   */
  i = 0;
  for (j = 0; my_in[j] != NIL; j++)
    if (my_in[j] == QUOTE)
      i++;
  if ((i & 1) == 1) {
    free(my_tmp);
    free(my_out);
    free(my_in);
    *ln = strlen(out);
    return NULL;
  }
  /*
   ** Scan through "in", substring by substring, to
   ** handle quotation marks correctly.
   */
  inxt = 0;
  while (my_in[inxt] != NIL) {
    if (my_in[inxt] == QUOTE) { /* Is there a quoted string next? */
      nxt = strchr(&my_in[inxt + 1], QUOTE);    /* Yes, find matching quote. */
      j = nxt - &my_in[inxt + 1];
      memcpy(my_tmp, &my_in[inxt + 1], j);      /* Make copy of it */
      my_tmp[j] = NIL;
      inxt = inxt + j + 2;
    } else {
      nxt = strchr(&my_in[inxt], QUOTE);        /* Not a quoted string; ..
                                                 ** .. find next non-escaped ..
                                                 ** .. quote.
                                                 */
      if (nxt != NULL) {
        j = nxt - my_in - inxt;
      } else {
        j = strlen(&my_in[inxt]);
      }
      memcpy(my_tmp, &my_in[inxt], j);  /* Make copy for us to work on */
      my_tmp[j] = NIL;
      inxt = inxt + j;
      /*
       ** For collapse and compress, start by turning all white space
       ** chars to spaces.
       */
      if (do_collapse || do_compress) {
        for (k = 0; my_tmp[k] != NIL; k++) {
          if (my_tmp[k] == '\t')
            my_tmp[k] = ' ';
          if (my_tmp[k] == '\f')
            my_tmp[k] = ' ';
          if (my_tmp[k] == '\v')
            my_tmp[k] = ' ';
          if (my_tmp[k] == '\n')
            my_tmp[k] = ' ';
        }
        if (do_collapse) {
          l = 0;
          for (k = 0; my_tmp[k] != NIL; k++) {
            if (my_tmp[k] != ' ') {
              my_tmp[l] = my_tmp[k];
              l++;
            }
          }
          my_tmp[l] = NIL;
        } else if (do_compress) {
          for (k = 0; my_tmp[k] != NIL; k++) {
            if (my_tmp[k] == ' ') {
              l = strspn(&my_tmp[k], " ");
              if (l > 1) {
                for (m = 0; my_tmp[k + l + m] != NIL; m++) {
                  my_tmp[k + m + 1] = my_tmp[k + l + m];
                }
                my_tmp[k + m + 1] = NIL;
              }
            }
          }
        }
      }
      if (do_lowercase) {
        for (k = 0; my_tmp[k] != NIL; k++)
          my_tmp[k] = _tolower(my_tmp[k]);
      }
      if (do_upcase) {
        for (k = 0; my_tmp[k] != NIL; k++)
          my_tmp[k] = _toupper(my_tmp[k]);
      }
      if (do_uncomment) {
        nxt = strchr(my_tmp, '!');
        if (nxt != NULL) {
          *nxt = NIL;           /* Truncate the string at the "!" */
          my_in[inxt] = NIL;    /* Stop processing loop too */
        }
      }
    }
    StrJoin(out, out_size, my_out, my_tmp);
    strcpy(my_out, out);
  }

  if (do_trim) {
    i = strspn(my_out, " ");
    if (i == strlen(my_out)) {  /* If all spaces, result is a null string */
      *out = NIL;
    } else {
      for (j = strlen(my_out); my_out[j - 1] == ' '; j--);
      my_out[j] = NIL;
    }
    strcpy(out, &my_out[i]);
  }
  free(my_tmp);
  free(my_out);
  free(my_in);
  *ln = strlen(out);
  /*
   ** Undo any encoded escape characters.
   */
  for (i = 0; out[i] != NIL; i++) {
    if (out[i] == ~'\"')
      out[i] = '\"';
  }

  return out;
}

/*-------------------------------------------------- End of StrEdit.C -------*/