diff --git a/CWRU/CWRU.chlog b/CWRU/CWRU.chlog index 9c349e90..4f571da0 100644 --- a/CWRU/CWRU.chlog +++ b/CWRU/CWRU.chlog @@ -11776,7 +11776,7 @@ lib/sh/strtrans.c builtins/read.def - read_builtin: make sure i is >= 0 after a timeout longjmp before trying to terminate input_string - From a report from Duncan Roe + From a report from Duncan Roe jobs.c,jobs.h - wait_for_background_pids: now takes a new first argument, WFLAGS. @@ -12785,7 +12785,7 @@ sig.c --- builtins/psize.c - sigpipe: work around cygwin SIGPIPE delivery bug - Report and fix from Duncan Roe + Report and fix from Duncan Roe 3/10 ---- @@ -12831,3 +12831,27 @@ builtins/type.def,builtins/complete.def,builtins/alias.def,builtins/type.def print_cmd.c - check for possible $'...' quoting and use it if appropriate instead of just calling sh_single_quote() + + 4/21 + ---- +examples/loadables/rev.c + - new loadable builtin from Duncan Roe + + 4/23 + ---- +trap.c + - run_interrupt_trap: set catch_flag depending on whether or not there + are any pending traps; don't set it to 0 unconditionally because we + haven't run through all the signals + Report and fix from František Šumšal + + 4/27 + ---- +command.h + - W_DQUOTE (unused) -> W_SPLITONLY (future use) + +subst.c + - list_string: now takes a set of word flags as the third argument; + old `quoted' is now (flags & W_QUOTED); changed all callers + appropriately + diff --git a/MANIFEST b/MANIFEST index e2b5ead6..40a8739f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -788,6 +788,7 @@ examples/loadables/fdflags.c f examples/loadables/finfo.c f examples/loadables/fltexpr.c f examples/loadables/jobid.c f +examples/loadables/rev.c f examples/loadables/cat.c f examples/loadables/chmod.c f examples/loadables/csv.c f diff --git a/command.h b/command.h index 1070d5b3..1c565cf7 100644 --- a/command.h +++ b/command.h @@ -92,7 +92,7 @@ enum command_type { cm_for, cm_case, cm_while, cm_if, cm_simple, cm_select, #define W_ASSNBLTIN (1 << 16) /* word is a builtin command that takes assignments */ #define W_ASSIGNARG (1 << 17) /* word is assignment argument to command */ #define W_HASQUOTEDNULL (1 << 18) /* word contains a quoted null character */ -#define W_DQUOTE (1 << 19) /* UNUSED - word should be treated as if double-quoted */ +#define W_SPLITONLY (1 << 19) /* word should be split but not undergo quoted null removal */ #define W_NOPROCSUB (1 << 20) /* don't perform process substitution */ #define W_SAWQUOTEDNULL (1 << 21) /* word contained a quoted null that was removed */ #define W_ASSIGNASSOC (1 << 22) /* word looks like associative array assignment */ diff --git a/examples/loadables/Makefile.in b/examples/loadables/Makefile.in index 6e8b635b..c01b50c6 100644 --- a/examples/loadables/Makefile.in +++ b/examples/loadables/Makefile.in @@ -259,6 +259,8 @@ fltexpr: fltexpr.o jobid: jobid.o $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ jobid.o $(SHOBJ_LIBS) +rev: rev.o + $(SHOBJ_LD) $(SHOBJ_LDFLAGS) $(SHOBJ_XLDFLAGS) -o $@ rev.o $(SHOBJ_LIBS) # pushd is a special case. We use the same source that the builtin version # uses, with special compilation options. @@ -325,7 +327,7 @@ OBJS = print.o truefalse.o accept.o sleep.o finfo.o getconf.o logname.o \ basename.o dirname.o tty.o pathchk.o tee.o head.o rmdir.o necho.o \ hello.o cat.o csv.o dsv.o kv.o cut.o printenv.o id.o whoami.o uname.o \ sync.o push.o mkdir.o mktemp.o realpath.o strftime.o setpgid.o stat.o \ - fdflags.o seq.o asort.o strptime.o chmod.o fltexpr.o jobid.o + fdflags.o seq.o asort.o strptime.o chmod.o fltexpr.o jobid.o rev.o ${OBJS}: ${BUILD_DIR}/config.h @@ -369,3 +371,4 @@ asort.o: asort.c strptime.o: strptime.c fltexpr.o: fltexpr.c jobid.o: jobid.c +rev.o: rev.c diff --git a/examples/loadables/rev.c b/examples/loadables/rev.c new file mode 100644 index 00000000..b8d7d4d8 --- /dev/null +++ b/examples/loadables/rev.c @@ -0,0 +1,302 @@ +/* rev - reverse lines in a file or files character by character */ + +/* + * Copyright (c) 1987, 1992 The Regents of the University of California. + * Copyright (C) 2026 Free Software Foundation, Inc. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see . + * + * Modified for Linux by Charles Hannum (mycroft@gnu.ai.mit.edu) + * and Brian Koehmstedt (bpk@gnu.ai.mit.edu) + * + * Wed Sep 14 22:26:00 1994: Patch from bjdouma to handle + * last line that has no newline correctly. + * 3-Jun-1998: Patched by Nicolai Langfeldt to work better on Linux: + * Handle any-length-lines. Code copied from util-linux' setpwnam.c + * 1999-02-22 Arkadiusz Miśkiewicz + * added Native Language Support + * 1999-09-19 Bruno Haible + * modified to work correctly in multi-byte locales + * July 2010 - Davidlohr Bueso + * Fixed memory leaks (including Linux signal handling) + * Added some memory allocation error handling + * Lowered the default buffer size to 256, instead of 512 bytes + * Changed tab indentation to 8 chars for better reading the code + * 2026/03/24 02:17:26: Duncan Roe (duncan_roe@optusnet.com.au) + * Increase speed by using read(2) and processing + * multi-byte characters locally. + * Initial version only handles UTF-8 encoding. + * 2026/04/04 01:52:47: Duncan Roe (duncan_roe@optusnet.com.au) + * Convert into a bash loadable builtin. + */ + +/* Headers */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "shmbutil.h" +#include "loadables.h" +#include /* Has to go after stdint & loadables (!) */ + +/* Macros */ + +#define SYSCALL(x, y) do x = y; while(x == -1 && errno == EINTR) +#define PUTC(x) if (v) *buf++ = x; else fputc(x, stdout) + +/* ********************************* getlen ********************************* */ + +static int +getlen(char *last_trlg_byte, int num_bytes_left) +/* Get the length of a UTF-8 sequence */ +/* + * If last_trlg_byte is indeed the last byte of a valid UTF-8 multibyte + * sequence, return the length of that sequence. Otherwise return 1. + * + * There can be up to 3 trailing bytes, which must start '10'b and carry 6 bits + * of data. The header byte starts with as many 1 bits as there are bytes in the + * sequence, followed by a 0 bit. The rest of the byte carries data. + * As an example, a 4-byte sequence starts '11110'b leaving 3 bits for data. + * 3 trailing bytes carry 6 bits each for a total of 21 bits. + * UTF-16 can only encode 20 bits, so there are very few 21-bit codepoints. + */ +{ + + const char mask[5] = { 0200, 0300, 0340, 0360, 0370 }; + char *p = last_trlg_byte; + int n; /* Bytes in header + trailer(s) */ + int i; + + if ((*p-- & mask[1]) != mask[0]) + goto not_utf_8; + n = 2; + for (i = num_bytes_left >= 3 ? 3 : num_bytes_left; i > 0; i--, p--, n++) + { /* 3 more bytes max */ + if ((*p & mask[1]) != mask[0]) + { + if ((*p & mask[n]) == mask[n - 1]) + return n; + else + goto not_utf_8; + } /* if ((*p-- & mask[1]) != mask[0]) */ + } + +not_utf_8: + return 1; +} /* getlen() */ + +/* ****************************** reverse_line ****************************** */ + +static void +reverse_line(SHELL_VAR *v, arrayind_t *ind, char *line, size_t len, + int outputsep, char sep) +{ + char *p, *q; + char *buf; + int i, j; +#if defined (ARRAY_VARS) + if (v) + { + /* + * Bypass extra copies and malloc / free calls by getting a shell var + * with NULL value and putting an allocated buffer in it. + */ + bind_array_element (v, (*ind)++, (char *)NULL, 0); + buf = xmalloc(len + 1); /* +1 for NUL */ + (((ARRAY *)v->value)->lastref)->value = buf; + buf[len] = '\0'; + } /* if (v) */ +#endif + + if (locale_utf8locale) + { + for (i = len, p = line + len - 1; i > 0; i--, p--) + { + if (*p & 0200) + { + j = getlen(p, i); + p = q = p - (j - 1); /* p-> 1st byte of seq */ + i -= (j - 1); /* Reduce num left by num trlg bytes */ + for (; j > 0; j--) + PUTC(*q++); + } /* if (*p & 0200) */ + else + PUTC(*p); + } /* for (i = len, p = line + len - 1; i > 0; i--) */ + } /* if (locale_utf8locale) */ + else + { + for (i = len, p = line + len - 1; i > 0; i--) + PUTC(*p--); + } /* if (locale_utf8locale) else */ + if (outputsep) + PUTC(sep); +} /* reverse_line() */ + +/* ****************************** rev_internal ****************************** */ + +static int +rev_internal(WORD_LIST *list) +{ + int unbuffered_read; + char *array_name; + arrayind_t ind; + int outputsep; + WORD_LIST *l; + SHELL_VAR *v; + size_t llen; + char *line; + size_t n; + int rval; + char sep; + int opt; + int fd; + + v = 0; + rval = EXECUTION_SUCCESS; + + array_name = 0; + sep = '\n'; + ind = 0; + + reset_internal_getopt(); + while ((opt = internal_getopt(list, "0:a:h")) != -1) + switch (opt) + { + case '0': + sep = '\0'; + break; + case 'a': +#if defined (ARRAY_VARS) + array_name = list_optarg; + break; +#else + builtin_error("arrays not available"); + return (EX_USAGE); +#endif + CASE_HELPOPT; + default: + builtin_usage(); + return (EX_USAGE); + } + + if (array_name && (valid_identifier(array_name) == 0)) + { + sh_invalidid(array_name); + return (EXECUTION_FAILURE); + } + +#if defined (ARRAY_VARS) + if (array_name) + { + v = builtin_find_indexed_array(array_name, 1); + if (v == 0) + return (EXECUTION_FAILURE); + } +#endif + + l = loptend; + line = 0; + llen = 0; + + do + { +/* for each file */ + + if (l == 0) + fd = 0; + else + SYSCALL(fd, open(l->word->word, O_RDONLY)); + if (fd == -1) + { + file_error(l->word->word); + rval = EXECUTION_FAILURE; + goto next_file; + } + +#ifndef __CYGWIN__ + unbuffered_read = (lseek(fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE); +#else + unbuffered_read = 1; +#endif + +/* Read from input */ + while ((n = zgetline(fd, &line, &llen, sep, unbuffered_read)) != -1) + { + QUIT; + if (line[n] == sep) + outputsep = 1; + else + { + outputsep = 0; + n++; /* Work around zgetline behaviour on unterminated line */ + } + reverse_line(v, &ind, line, n, outputsep, sep); + } /* while ((n = zgetline(...) !=-1) */ + if (fd != 0) + close(fd); + + next_file: + QUIT; + if (l) + l = l->next; + } /* do */ + while (l); + + free(line); + return rval; +} /* rev_internal() */ + +/* ********************************** main ********************************** */ + +int +rev_builtin(WORD_LIST *list) +{ + return rev_internal(list); +} /* main() */ + +char *rev_doc[] = { + "Reverse lines characterwise.", + "", + "Copy the lines of the specified files to standard output,", + "or assign them to the indexed array ARRAY starting at index 0,", + "reversing the order of characters in every line.", + "If no files are specified, standard input is read.", + "", + "When -0 is specified, use the byte '\\0' as line separator.", + "", + "When -a is specified, assign each reversed line" + "to successive elements of ARRAY,", + "beginning at 0.", + "The lines rev assigns to ARRAY are identical to the lines it would", + "write to the standard output if -a were not supplied.", + "", + "This utility processes UTF-8 without using a wide-character buffer.", + (char *)NULL +}; + +struct builtin rev_struct = { + "rev", /* builtin name */ + rev_builtin, /* function implementing the builtin */ + BUILTIN_ENABLED, /* initial flags for builtin */ + rev_doc, /* array of long documentation strings */ + "rev [-0] [-a ARRAY] [file ...]", /* usage synopsis; becomes short_doc */ + 0 /* reserved for internal use */ +}; diff --git a/lib/glob/sm_loop.c b/lib/glob/sm_loop.c index b2332a00..50cd342d 100644 --- a/lib/glob/sm_loop.c +++ b/lib/glob/sm_loop.c @@ -356,6 +356,11 @@ fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe); break; default: + /* POSIX says it should be something like this: + if ((U_CHAR)c != (U_CHAR)sc && (U_CHAR)c != TOUPPER(sc) && (U_CHAR)c != TOLOWER(sc)) + return (FNM_NOMATCH); + with TOUPPER and TOLOWER handling wide characters appropriately. + */ if ((U_CHAR)c != FOLD (sc)) return (FNM_NOMATCH); } diff --git a/lib/sh/zgetline.c b/lib/sh/zgetline.c index 79db1ce1..027d6ba7 100644 --- a/lib/sh/zgetline.c +++ b/lib/sh/zgetline.c @@ -43,7 +43,7 @@ typedef ssize_t breadfunc_t (int, char *, size_t); typedef ssize_t creadfunc_t (int, char *); /* Initial memory allocation for automatic growing buffer in zreadlinec */ -#define GET_LINE_INITIAL_ALLOCATION 64 +#define GET_LINE_INITIAL_ALLOCATION 128 /* Derived from GNU libc's getline. The behavior is almost the same as getline. See man getline. diff --git a/lib/sh/zread.c b/lib/sh/zread.c index 9246d1e2..a1862165 100644 --- a/lib/sh/zread.c +++ b/lib/sh/zread.c @@ -28,6 +28,7 @@ #include #include +#include #if !defined (errno) extern int errno; @@ -85,6 +86,15 @@ zbufpush(int c) return 1; } +static inline int +zbufpeek (void) +{ + if (zpushind == zpopind) + return (0); + return zpushbuf[zpopind]; +} + + /* Add C to the pushback buffer. Can't push back EOF */ int zungetc (int c) @@ -281,6 +291,26 @@ zreadn (int fd, char *cp, size_t len) return 1; } +/* `Peek' in the read buffer for DELIM and return the number of characters to + read to get to DELIM. Just a skeleton for now. */ +size_t +zpeekfd (int fd, int delim) +{ + int c; + ssize_t len; + char *t; + + if ((c = zbufpeek ()) == delim) + return 1; + len = lused - lind; + if (len <= 0) + return 0; /* not found, need to read more */ + t = memchr (lbuf + lind, delim, len); + if (t != NULL) + return (t - lbuf - lind); + return 0; /* not found, read more and let the buffer refill */ +} + void zreset (void) { diff --git a/sig.c b/sig.c index 1b8dda65..980191f2 100644 --- a/sig.c +++ b/sig.c @@ -607,9 +607,12 @@ termsig_handler (int sig) if (sig == SIGPIPE && builtin_catch_sigpipe) sigpipe_handler (sig); - /* I don't believe this condition ever tests true. */ + /* I don't believe this condition ever tests true, so print a message if it does. */ if (sig == SIGINT && signal_is_trapped (SIGINT)) - run_interrupt_trap (0); + { + INTERNAL_DEBUG (("termsig_handler: running SIGINT trap")); + run_interrupt_trap (0); + } #if defined (HISTORY) /* If we don't do something like this, the history will not be saved when diff --git a/subst.c b/subst.c index 5f95a278..37014c5e 100644 --- a/subst.c +++ b/subst.c @@ -425,10 +425,10 @@ dump_word_flags (int flags) f &= ~W_NOPROCSUB; fprintf (stderr, "W_NOPROCSUB%s", f ? "|" : ""); } - if (f & W_DQUOTE) + if (f & W_SPLITONLY) { - f &= ~W_DQUOTE; - fprintf (stderr, "W_DQUOTE%s", f ? "|" : ""); + f &= ~W_SPLITONLY; + fprintf (stderr, "W_SPLITONLY%s", f ? "|" : ""); } if (f & W_HASQUOTEDNULL) { @@ -3177,11 +3177,12 @@ string_list_pos_params (int pchar, WORD_LIST *list, int quoted, int pflags) : ifs_whitespace (c)) WORD_LIST * -list_string (char *string, char *separators, int quoted) +list_string (char *string, char *separators, int flags) { WORD_LIST *result; WORD_DESC *t; char *current_word, *s; + int quoted; int sh_style_split, whitesep, xflags, free_word; size_t sindex; size_t slen; @@ -3189,6 +3190,8 @@ list_string (char *string, char *separators, int quoted) if (!string || !*string) return ((WORD_LIST *)NULL); + quoted = flags & W_QUOTED; + sh_style_split = separators && separators[0] == ' ' && separators[1] == '\t' && separators[2] == '\n' && @@ -9103,6 +9106,15 @@ parameter_brace_transform (char *varname, char *value, array_eltstate_t *estatep if ((xc == 'a' || xc == 'A') && vtype == VT_VARIABLE && varname && v == 0) v = find_variable (varname); +#if 0 /*TAG:bash-5.4 https://lists.gnu.org/archive/html/bug-bash/2026-03/msg00051.html 3/15/2026 */ + /* something like ${x[1]@A} should be an error */ + if (xc == 'A' && vtype == VT_ARRAYMEMBER && v && estatep->type == ARRAY_INDEXED && estatep->subtype == 0) + { + this_command_name = oname; + return (interactive_shell ? &expand_param_error : &expand_param_fatal); + } +#endif + temp1 = (char *)NULL; /* shut up gcc */ switch (vtype) { @@ -12230,7 +12242,7 @@ finished_with_string: } else if (word->flags & W_ASSIGNRHS) { - list = list_string (istring, "", quoted); + list = list_string (istring, "", quoted ? W_QUOTED : 0); tword = list->word; if (had_quoted_null && QUOTED_NULL (istring)) tword->flags |= W_HASQUOTEDNULL; @@ -12262,9 +12274,9 @@ finished_with_string: the individual words on $' \t\n'. We rely on previous steps to quote the portions of the word that should not be split */ if (ifs_is_set == 0) - list = list_string (istring, " \t\n", 1); /* XXX quoted == 1? */ + list = list_string (istring, " \t\n", W_QUOTED); /* XXX quoted == 1? */ else - list = list_string (istring, " ", 1); /* XXX quoted == 1? */ + list = list_string (istring, " ", W_QUOTED); /* XXX quoted == 1? */ } /* If we have $@ (has_dollar_at != 0) and we are in a context where we @@ -12286,7 +12298,7 @@ finished_with_string: need it to get the space separation right if space isn't the first character in IFS (but is present) and to remove the quoting we added back in param_expand(). */ - list = list_string (istring, *ifs_chars ? ifs_chars : " ", 1); + list = list_string (istring, *ifs_chars ? ifs_chars : " ", W_QUOTED); /* This isn't exactly right in the case where we're expanding the RHS of an expansion like ${var-$@} where IFS=: (for example). The W_NOSPLIT2 means we do the separation with :; @@ -12307,7 +12319,7 @@ finished_with_string: goto set_word_flags; } else if (has_dollar_at && ifs_chars) - list = list_string (istring, *ifs_chars ? ifs_chars : " ", 1); + list = list_string (istring, *ifs_chars ? ifs_chars : " ", W_QUOTED); else { tword = alloc_word_desc (); diff --git a/trap.c b/trap.c index 9c2ac469..f5aebdc6 100644 --- a/trap.c +++ b/trap.c @@ -70,6 +70,8 @@ extern int errno; #define SPECIAL_TRAP(s) ((s) == EXIT_TRAP || (s) == DEBUG_TRAP || (s) == ERROR_TRAP || (s) == RETURN_TRAP) +#define any_pending_traps() first_pending_trap() != -1 + /* An array of such flags, one for each signal, describing what the shell will do with a signal. DEBUG_TRAP == NSIG; some code below assumes this. */ @@ -361,7 +363,10 @@ run_pending_traps (void) } } - catch_flag = trapped_signal_received = 0; + /* reset this before we run through the loop; if a signal arrives while we + are running the traps, it will set catch_flag to 1. */ + catch_flag = 0; + trapped_signal_received = 0; /* Preserve $? when running trap. */ trap_saved_exit_value = old_exit_value = last_command_exit_value; @@ -1369,7 +1374,9 @@ run_interrupt_trap (int will_throw) if (will_throw && running_trap > 0) run_trap_cleanup (running_trap - 1); pending_traps[SIGINT] = 0; /* run_pending_traps does this */ - catch_flag = 0; + /* We don't want to set this to 0 unconditionally, since we're only running + a SIGINT trap. */ + catch_flag = any_pending_traps (); _run_trap_internal (SIGINT, "interrupt trap"); }