#define ident "1B03" #ifdef VAXC #module StrEdit ident #endif #ifdef __DECC #pragma module StrEdit ident #endif /* ** +--------------------------------------------------------------+ ** | Paul Scherrer Institute | ** | Department ASQ | ** | | ** | This software may be used freely by non-profit organizations.| ** | It may be copied provided that the name of P.S.I. and of the | ** | author is included. Neither P.S.I. nor the author assume any | ** | responsibility for the use of this software outside of P.S.I.| ** +--------------------------------------------------------------+ ** ** Module Name . . . . . . . . : [...LIB.SINQ]StrEdit.C ** ** Author . . . . . . . . . . : D. Maden ** Date of creation . . . . . . : Jan 1996 ** ** To compile this module, use: $ import tasmad $ define/group sinq_c_tlb mad_lib:sinq_c.tlb $ cc /debug /noopt /obj=[]StrEdit - tasmad_disk:[mad.lib.sinq]StrEdit + sinq_c_tlb/lib ** To include this module in SINQ.OLB, use: $ import tasmad $ define/group sinq_c_tlb mad_lib:sinq_c.tlb $ $ define/group sinq_olb mad_lib:sinq_dbg.olb $ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit debug $ $ define/group sinq_olb mad_lib:sinq.olb $ @tasmad_disk:[mad.lib.sinq]sinq_olb StrEdit ** ** Updates: ** 1A01 19-Jan-1996 DM. Initial version. ** 1B01 21-Mar-1996 DM. Move from DELTAT.OLB to SINQ.OLB. **============================================================================ ** The following entry points are included in this module: ** **------------------------------------------------------------------------- ** #include ** ** char *StrEdit (char *out, char *in, char *ctrl, int *ln) ** ------- ** Input Args: ** in - the string to be edited. ** ctrl - the string specifying what is to be done. See Description ** below. ** Output Args: ** out - the edited string. The maximum size of this string must ** be specified as input parameter *ln. The string ** will be zero terminated on return. ** Modified Args: ** *ln - an integer specifying, on input, the length of "out" in ** bytes. This must include room for the zero termination. ** On return, ln will be set to the number of characters ** copied to "out" (not counting the zero termination byte). ** Return value: ** If an error is detected, the return value is a NULL pointer. Otherwise ** it is a pointer to the resulting string (i.e. "out"). ** Global variables: ** none ** Routines called: ** none ** Description: ** StrEdit (out, in, ctrl, ln) - This routine is intended to mimic the ** OpenVMS DCL lexical function F$EDIT. ** ** It first processes the string "in" to convert any C-style ** escape sequences introduced by a '\' character. Recognised ** escape sequences are: ** \a --> \007 BEL ** \b --> \010 BS (backspace) ** \f --> \014 FF (formfeed) ** \n --> \012 LF (linefeed) ** \r --> \015 CR (carriage return) ** \t --> \011 HT (horizontal tab) ** \v --> \013 VT (vertical tab) ** \\ --> \ ** \' --> ' ** \" --> " ** \? --> ? ** \xhh --> hh are an arbitrary number of hex digits. ** \nnn --> nnn are up to 3 octal digits. ** Any unrecognised escape sequence will be left unchanged. ** ** The resulting string is then edited according to the ** keywords specified in the control string "ctrl". The result ** will be written to string "out". The "out" argument may be ** the same as "in". ** ** On entry, "ln" specifies the size of "out" in bytes, including ** space for a null terminating byte. On return, it is set to the ** length of the result (not counting the zero-terminator). ** ** The following control strings are recognised: ** ** COLLAPSE - Removes all spaces and tabs from the string. ** COMPRESS - Replaces multiple spaces and tabs with a ** single space. ** LOWERCASE - Makes the string lower case. ** TRIM - Removes leading and trailing spaces and tabs ** from the string. ** UNCOMMENT - Removes comments from the string. ** UPCASE - Makes the string upper case. ** ** All keywords must be specified in full. They may be separated ** by white-space or commas and be in upper or lower case. ** ** If the input string contains non-escaped double quotes ("), ** then the editing functions are not applied to substrings within ** these quotes ("), there must be an even number of such quotes ** and the quotes are not copied to the resulting string. On the ** other hand, escaped double quotes (\") are treated as normal ** characters. ** ** Return Status: ** StrEdit returns a pointer to "out". If any errors are detected (e.g. an ** odd number of quotes), string editing is abandoned and a null pointer ** is returned. ** ** Example: ** strcpy (in, " asdfg \"hello there\" folks "); ** len = sizeof (in); ** printf ("\"%s\"\n", StrEdit (in, in, "trim upcase compress", &len)); ** will generate ** "ASDFG hello there FOLKS" **------------------------------------------------------------------------- ** Global Definitions */ #include #include #include #include #include #define NIL '\0' #define True 1 #define False 0 #define QUOTE ((char) (('\"' ^ 0xff) & 0xff)) /* **==================================================================== */ /* **==================================================================== */ /*-------------------------------------------------------------------------- ** Global Variables */ /* **--------------------------------------------------------------------------- ** StrEdit - edit a string. ** Note: strncat is used exclusively rather than ** strncpy to be sure result is always ** null terminated. */ char *StrEdit( /* ======= */ char *out, char *in, char *ctrl, int *ln) { int i, j, k, l, m, len, inxt, out_size; char my_ctrl[80]; char *tok_nxt, *my_in, *my_out, *my_tmp, *nxt; int do_collapse, do_compress, do_lowercase, do_trim; int do_uncomment, do_upcase; out_size = *ln; if (out_size < 1) { *ln = 0; return NULL; } /* Can't do anything!! */ if (strlen(in) <= 0) { *out = NIL; *ln = 0; return out; /* Nothing to do!! */ } /* ** Scan ctrl looking to see what has to be done. Do this by first ** taking a copy of it (in case it is declared "const" in the calling ** routine, convert to lowercase and split into tokens at any space, ** tab or comma. */ len = strlen(ctrl); if (len >= sizeof(my_ctrl)) { *out = NIL; *ln = 0; return NULL; } for (i = 0; i <= len; i++) my_ctrl[i] = tolower(ctrl[i]); do_collapse = do_compress = do_lowercase = do_trim = do_uncomment = do_upcase = False; tok_nxt = strtok(my_ctrl, ", \t\f\v\n"); while (tok_nxt != NULL) { if (strcmp(tok_nxt, "collapse") == 0) { do_collapse = True; } else if (strcmp(tok_nxt, "compress") == 0) { do_compress = True; } else if (strcmp(tok_nxt, "lowercase") == 0) { do_lowercase = True; } else if (strcmp(tok_nxt, "trim") == 0) { do_trim = True; } else if (strcmp(tok_nxt, "uncomment") == 0) { do_uncomment = True; } else if (strcmp(tok_nxt, "upcase") == 0) { do_upcase = True; } else { *out = NIL; *ln = 0; return NULL; /* Illegal ctrl verb */ } tok_nxt = strtok(NULL, ", \t\f\v\n"); } len = strlen(in) + 1; my_in = malloc(len); /* Get some working space */ if (my_in == NULL) { *out = NIL; *ln = 0; return NULL; } /* ** Copy "in" to the "my_in" working space, processing any '\' escape ** sequences as we go. Note that, since "my_in" is big enough to hold ** "in" and the escape sequence processing can only shorten the length ** of "in", there's no need to check for an overflow of "my_in". Any ** non-escaped double quotes are converted to something special so ** that they can be recognised at the editing stage. */ nxt = my_in; while (*in != '\0') { if (*in == '\\') { /* Look for escape sequence */ in++; switch (*in) { case 'a': case 'A': *nxt++ = '\007'; in++; break; case 'b': case 'B': *nxt++ = '\010'; in++; break; case 'f': case 'F': *nxt++ = '\014'; in++; break; case 'n': case 'N': *nxt++ = '\012'; in++; break; case 'r': case 'R': *nxt++ = '\015'; in++; break; case 't': case 'T': *nxt++ = '\011'; in++; break; case 'v': case 'V': *nxt++ = '\013'; in++; break; case '\\': *nxt++ = '\\'; in++; break; case '\'': *nxt++ = '\''; in++; break; case '\"': *nxt++ = '\"'; in++; break; case '\?': *nxt++ = '\?'; in++; break; case 'x': case 'X': in++; i = strspn(in, "0123456789abcdefABCDEF"); if (i > 0) { *nxt++ = strtol(in, &in, 16); break; } else { *nxt++ = '\\'; break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': i = strspn(in, "01234567"); if (i > 3) { sscanf(in, "%3o", &j); *nxt++ = j; in += 3; break; } else if (i > 0) { sscanf(in, "%o", &j); *nxt++ = j; in += i; break; } else { *nxt++ = '\\'; break; } default: *nxt++ = '\\'; /* Invalid esc sequ - just copy it */ } } else if (*in == '\"') { /* Look for non-escaped double quotes */ *nxt++ = QUOTE; *in++; /* Make it something unlikely */ } else { *nxt++ = *in++; } } *nxt = '\0'; my_out = malloc(len); /* Get some working space */ if (my_out == NULL) { free(my_in); *out = NIL; *ln = 0; return NULL; } *my_out = NIL; my_tmp = malloc(len); /* Get some working space */ if (my_tmp == NULL) { free(my_out); free(my_in); *out = NIL; *ln = 0; return NULL; } *my_tmp = NIL; *out = NIL; /* ** Ensure "in" has an even number of non-escaped quotes. Return if not. */ i = 0; for (j = 0; my_in[j] != NIL; j++) if (my_in[j] == QUOTE) i++; if ((i & 1) == 1) { free(my_tmp); free(my_out); free(my_in); *ln = strlen(out); return NULL; } /* ** Scan through "in", substring by substring, to ** handle quotation marks correctly. */ inxt = 0; while (my_in[inxt] != NIL) { if (my_in[inxt] == QUOTE) { /* Is there a quoted string next? */ nxt = strchr(&my_in[inxt + 1], QUOTE); /* Yes, find matching quote. */ j = nxt - &my_in[inxt + 1]; memcpy(my_tmp, &my_in[inxt + 1], j); /* Make copy of it */ my_tmp[j] = NIL; inxt = inxt + j + 2; } else { nxt = strchr(&my_in[inxt], QUOTE); /* Not a quoted string; .. ** .. find next non-escaped .. ** .. quote. */ if (nxt != NULL) { j = nxt - my_in - inxt; } else { j = strlen(&my_in[inxt]); } memcpy(my_tmp, &my_in[inxt], j); /* Make copy for us to work on */ my_tmp[j] = NIL; inxt = inxt + j; /* ** For collapse and compress, start by turning all white space ** chars to spaces. */ if (do_collapse || do_compress) { for (k = 0; my_tmp[k] != NIL; k++) { if (my_tmp[k] == '\t') my_tmp[k] = ' '; if (my_tmp[k] == '\f') my_tmp[k] = ' '; if (my_tmp[k] == '\v') my_tmp[k] = ' '; if (my_tmp[k] == '\n') my_tmp[k] = ' '; } if (do_collapse) { l = 0; for (k = 0; my_tmp[k] != NIL; k++) { if (my_tmp[k] != ' ') { my_tmp[l] = my_tmp[k]; l++; } } my_tmp[l] = NIL; } else if (do_compress) { for (k = 0; my_tmp[k] != NIL; k++) { if (my_tmp[k] == ' ') { l = strspn(&my_tmp[k], " "); if (l > 1) { for (m = 0; my_tmp[k + l + m] != NIL; m++) { my_tmp[k + m + 1] = my_tmp[k + l + m]; } my_tmp[k + m + 1] = NIL; } } } } } if (do_lowercase) { for (k = 0; my_tmp[k] != NIL; k++) my_tmp[k] = _tolower(my_tmp[k]); } if (do_upcase) { for (k = 0; my_tmp[k] != NIL; k++) my_tmp[k] = _toupper(my_tmp[k]); } if (do_uncomment) { nxt = strchr(my_tmp, '!'); if (nxt != NULL) { *nxt = NIL; /* Truncate the string at the "!" */ my_in[inxt] = NIL; /* Stop processing loop too */ } } } StrJoin(out, out_size, my_out, my_tmp); strcpy(my_out, out); } if (do_trim) { i = strspn(my_out, " "); if (i == strlen(my_out)) { /* If all spaces, result is a null string */ *out = NIL; } else { for (j = strlen(my_out); my_out[j - 1] == ' '; j--); my_out[j] = NIL; } strcpy(out, &my_out[i]); } free(my_tmp); free(my_out); free(my_in); *ln = strlen(out); /* ** Undo any encoded escape characters. */ for (i = 0; out[i] != NIL; i++) { if (out[i] == ~'\"') out[i] = '\"'; } return out; } /*-------------------------------------------------- End of StrEdit.C -------*/