From 4d4294caf728fb32307c506cb18afc90d4b70afa Mon Sep 17 00:00:00 2001 From: Chet Ramey Date: Sun, 8 Aug 2021 20:44:28 -0400 Subject: [PATCH] globbing reacts to signals more quickly; new printf %Q format specifier; readline completion handles multiline quotes better --- CWRU/CWRU.chlog | 32 ++++++++++++++++++++++++++++++++ builtins/printf.def | 34 +++++++++++++++++++++++++++++++--- doc/bash.1 | 10 +++++++--- doc/bashref.texi | 5 ++++- doc/version.texi | 6 +++--- lib/glob/glob.c | 4 ++-- lib/glob/sm_loop.c | 5 +++++ subst.c | 36 ++++++++++++++++++++++++++---------- 8 files changed, 110 insertions(+), 22 deletions(-) diff --git a/CWRU/CWRU.chlog b/CWRU/CWRU.chlog index 4421b986..4d87af74 100644 --- a/CWRU/CWRU.chlog +++ b/CWRU/CWRU.chlog @@ -1643,3 +1643,35 @@ builtins/enable.def doc/{bash.1,bashref.texi} - enable: document new behavior of `enable NAME' when NAME is not a current shell builtin + + 8/3 + --- +lib/glob/sm_loop.c + - GMATCH: check for interrupts or terminating signals each time through + the loop and return FNM_NOMATCH immediately if received. Let the + higher layers deal with interrupting the match and jumping out. + Inspired by a report from andrej@podzimek.org + + 8/6 + --- +subst.c + - {parameter_brace_remove_pattern,parameter_brace_transform, + parameter_brace_substring,parameter_brace_casemod, + parameter_brace_patsub}: make sure the IND argument is of type + arrayind_t to handle the full range of subscripts. Reported by + felix@f-hauri.ch + +builtins/printf.def + - printf_builtin: take new format specifier: %Q. This acts like %q + but applies any supplied precision to the original unquoted + argument, then quotes that result and outputs the entire quoted + string, preserving the original field width. Originally suggested + back in 4/2020 by Sam Liddicott + +subst.c + - char_is_quoted: check whether or not we are on the second or later + line of a command with an unclosed quoted string from a previous + line. If we are, see if it's closed before the character we're + checking, so we don't interpret a closing quote as starting a new + quoted string. Reported several times, most recently by + Phi Debian in 6/2021. diff --git a/builtins/printf.def b/builtins/printf.def index 5605bedb..38b6fa89 100644 --- a/builtins/printf.def +++ b/builtins/printf.def @@ -40,6 +40,8 @@ printf interprets: %b expand backslash escape sequences in the corresponding argument %q quote the argument in a way that can be reused as shell input + %Q like %q, but apply any precision to the unquoted argument before + quoting %(fmt)T output the date-time string resulting from using FMT as a format string for strftime(3) @@ -245,7 +247,7 @@ printf_builtin (list) { int ch, fieldwidth, precision; int have_fieldwidth, have_precision; - char convch, thisch, nextch, *format, *modstart, *fmt, *start; + char convch, thisch, nextch, *format, *modstart, *precstart, *fmt, *start; #if defined (HANDLE_MULTIBYTE) char mbch[25]; /* 25 > MB_LEN_MAX, plus can handle 4-byte UTF-8 and large Unicode characters*/ int mbind, mblen; @@ -338,6 +340,7 @@ printf_builtin (list) { precision = fieldwidth = 0; have_fieldwidth = have_precision = 0; + precstart = 0; if (*fmt == '\\') { @@ -411,6 +414,8 @@ printf_builtin (list) if (*fmt == '-') #endif fmt++; + if (DIGIT (*fmt)) + precstart = fmt; while (DIGIT (*fmt)) fmt++; } @@ -580,12 +585,27 @@ printf_builtin (list) } case 'q': /* print with shell quoting */ + case 'Q': { char *p, *xp; - int r; + int r, mpr; + size_t slen; r = 0; p = getstr (); + /* Decode precision and apply it to the unquoted string. */ + if (convch == 'Q' && precstart) + { + mpr = *precstart++ - '0'; + while (DIGIT (*precstart)) + mpr = (mpr * 10) + (*precstart++ - '0'); + /* Error if precision > INT_MAX here? */ + precision = (mpr < 0 || mpr > INT_MAX) ? INT_MAX : mpr; + slen = strlen (p); + /* printf precision works in bytes. */ + if (precision < slen) + p[precision] = '\0'; + } if (p && *p == 0) /* XXX - getstr never returns null */ xp = savestring ("''"); else if (ansic_shouldquote (p)) @@ -594,6 +614,12 @@ printf_builtin (list) xp = sh_backslash_quote (p, 0, 3); if (xp) { + if (convch == 'Q') + { + slen = strlen (xp); + if (slen > precision) + precision = slen; + } /* Use printstr to get fieldwidth and precision right. */ r = printstr (start, xp, strlen (xp), fieldwidth, precision); if (r < 0) @@ -767,7 +793,7 @@ printstr (fmt, string, len, fieldwidth, precision) fw = (mfw < 0 || mfw > INT_MAX) ? INT_MAX : mfw; } - /* get precision, if present */ + /* get precision, if present. doesn't handle negative precisions */ if (*fmt == '.') { fmt++; @@ -783,6 +809,8 @@ printstr (fmt, string, len, fieldwidth, precision) mpr = (mpr * 10) + (*fmt++ - '0'); /* Error if precision > INT_MAX here? */ pr = (mpr < 0 || mpr > INT_MAX) ? INT_MAX : mpr; + if (pr < precision && precision < INT_MAX) + pr = precision; /* XXX */ } else pr = 0; /* "a null digit string is treated as zero" */ diff --git a/doc/bash.1 b/doc/bash.1 index 3f47428c..3ebe0693 100644 --- a/doc/bash.1 +++ b/doc/bash.1 @@ -5,12 +5,12 @@ .\" Case Western Reserve University .\" chet.ramey@case.edu .\" -.\" Last Change: Fri Jul 30 15:05:43 EDT 2021 +.\" Last Change: Fri Aug 6 14:30:46 EDT 2021 .\" .\" bash_builtins, strip all but Built-Ins section .if \n(zZ=1 .ig zZ .if \n(zY=1 .ig zY -.TH BASH 1 "2021 July 30" "GNU Bash 5.1" +.TH BASH 1 "2021 August 6" "GNU Bash 5.1" .\" .\" There's some problem with having a `@' .\" in a tagged paragraph with the BSD man macros. @@ -1223,7 +1223,7 @@ and \fBTEXTDOMAIN\fP shell variables. If the current locale is \fBC\fP or \fBPOSIX\fP, or if there are no translations available, the dollar sign is ignored. -This is a form of quoting, so the string always remains double-quoted, +This is a form of double quoting, so the string remains double-quoted, whether or not it is translated and replaced. .SH PARAMETERS A @@ -9250,6 +9250,10 @@ in the same way as \fBecho \-e\fP. causes \fBprintf\fP to output the corresponding \fIargument\fP in a format that can be reused as shell input. .TP +.B %Q +like \fB%q\fP, but applies any supplied precision to the \fIargument\fP +before quoting it. +.TP .B %(\fIdatefmt\fP)T causes \fBprintf\fP to output the date-time string resulting from using \fIdatefmt\fP as a format string for \fIstrftime\fP(3). diff --git a/doc/bashref.texi b/doc/bashref.texi index 084f3a99..e3fb377c 100644 --- a/doc/bashref.texi +++ b/doc/bashref.texi @@ -549,7 +549,7 @@ If the current locale is @code{C} or @code{POSIX}, or if there are no translations available, the dollar sign is ignored, and the shell doesn't attempt to translate the string. -Since this is a form of quoting, the string always remains double-quoted, +Since this is a form of double quoting, the string remains double-quoted, whether or not it is translated and replaced. The rest of this section is a brief overview of how you use gettext to @@ -4687,6 +4687,9 @@ corresponding @var{argument} in the same way as @code{echo -e} @item %q Causes @code{printf} to output the corresponding @var{argument} in a format that can be reused as shell input. +@item %Q +like @code{%q}, but applies any supplied precision to the @var{argument} +before quoting it. @item %(@var{datefmt})T Causes @code{printf} to output the date-time string resulting from using @var{datefmt} as a format string for @code{strftime}(3). diff --git a/doc/version.texi b/doc/version.texi index 4dae9204..ce4d6225 100644 --- a/doc/version.texi +++ b/doc/version.texi @@ -2,10 +2,10 @@ Copyright (C) 1988-2021 Free Software Foundation, Inc. @end ignore -@set LASTCHANGE Fri Jul 30 15:04:53 EDT 2021 +@set LASTCHANGE Fri Aug 6 14:31:16 EDT 2021 @set EDITION 5.1 @set VERSION 5.1 -@set UPDATED 30 July 2021 -@set UPDATED-MONTH July 2021 +@set UPDATED 6 August 2021 +@set UPDATED-MONTH August 2021 diff --git a/lib/glob/glob.c b/lib/glob/glob.c index 590370c7..5aa34794 100644 --- a/lib/glob/glob.c +++ b/lib/glob/glob.c @@ -956,7 +956,7 @@ glob_vector (pat, dir, flags) /* compat: if GX_ADDCURDIR, add the passed directory also. Add an empty directory name as a placeholder if GX_NULLDIR (in which case the passed directory name is "."). */ - if (add_current) + if (add_current && lose == 0) { sdlen = strlen (dir); nextname = (char *)malloc (sdlen + 1); @@ -986,7 +986,7 @@ glob_vector (pat, dir, flags) lose |= name_vector == NULL; } - /* Have we run out of memory? */ + /* Have we run out of memory or been interrupted? */ if (lose) { tmplink = 0; diff --git a/lib/glob/sm_loop.c b/lib/glob/sm_loop.c index e9dd3c56..247ba28a 100644 --- a/lib/glob/sm_loop.c +++ b/lib/glob/sm_loop.c @@ -16,6 +16,8 @@ along with Bash. If not, see . */ +extern int interrupt_state, terminating_signal; + struct STRUCT { CHAR *pattern; @@ -81,6 +83,9 @@ fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe); sc = n < se ? *n : '\0'; + if (interrupt_state || terminating_signal) + return FNM_NOMATCH; + #ifdef EXTENDED_GLOB /* EXTMATCH () will handle recursively calling GMATCH, so we can just return what EXTMATCH() returns. */ diff --git a/subst.c b/subst.c index fdb79b28..aa2f7586 100644 --- a/subst.c +++ b/subst.c @@ -287,7 +287,7 @@ static char *parameter_list_remove_pattern PARAMS((int, char *, int, int)); #ifdef ARRAY_VARS static char *array_remove_pattern PARAMS((SHELL_VAR *, char *, int, int, int)); #endif -static char *parameter_brace_remove_pattern PARAMS((char *, char *, int, char *, int, int, int)); +static char *parameter_brace_remove_pattern PARAMS((char *, char *, arrayind_t, char *, int, int, int)); static char *string_var_assignment PARAMS((SHELL_VAR *, char *)); #if defined (ARRAY_VARS) @@ -300,7 +300,7 @@ static char *parameter_list_transform PARAMS((int, int, int)); #if defined ARRAY_VARS static char *array_transform PARAMS((int, SHELL_VAR *, int, int)); #endif -static char *parameter_brace_transform PARAMS((char *, char *, int, char *, int, int, int, int)); +static char *parameter_brace_transform PARAMS((char *, char *, arrayind_t, char *, int, int, int, int)); static int valid_parameter_transform PARAMS((char *)); static char *process_substitute PARAMS((char *, int)); @@ -328,16 +328,16 @@ static char *skiparith PARAMS((char *, int)); static int verify_substring_values PARAMS((SHELL_VAR *, char *, char *, int, intmax_t *, intmax_t *)); static int get_var_and_type PARAMS((char *, char *, arrayind_t, int, int, SHELL_VAR **, char **)); static char *mb_substring PARAMS((char *, int, int)); -static char *parameter_brace_substring PARAMS((char *, char *, int, char *, int, int, int)); +static char *parameter_brace_substring PARAMS((char *, char *, arrayind_t, char *, int, int, int)); static int shouldexp_replacement PARAMS((char *)); static char *pos_params_pat_subst PARAMS((char *, char *, char *, int)); -static char *parameter_brace_patsub PARAMS((char *, char *, int, char *, int, int, int)); +static char *parameter_brace_patsub PARAMS((char *, char *, arrayind_t, char *, int, int, int)); static char *pos_params_casemod PARAMS((char *, char *, int, int)); -static char *parameter_brace_casemod PARAMS((char *, char *, int, int, char *, int, int, int)); +static char *parameter_brace_casemod PARAMS((char *, char *, arrayind_t, int, char *, int, int, int)); static WORD_DESC *parameter_brace_expand PARAMS((char *, int *, int, int, int *, int *)); static WORD_DESC *param_expand PARAMS((char *, int *, int, int *, int *, int *, int *, int)); @@ -2173,6 +2173,21 @@ char_is_quoted (string, eindex) oldjmp = no_longjmp_on_fatal_error; no_longjmp_on_fatal_error = 1; i = pass_next = 0; + + /* If we have an open quoted string from a previous line, see if it's + closed before string[eindex], so we don't interpret that close quote + as starting a new quoted string. */ + if (current_command_line_count > 0 && dstack.delimiter_depth > 0) + { + c = dstack.delimiters[dstack.delimiter_depth - 1]; + if (c == '\'') + i = skip_single_quoted (string, slen, 0, 0); + else if (c == '"') + i = skip_double_quoted (string, slen, 0, SX_COMPLETE); + if (i > eindex) + CQ_RETURN (1); + } + while (i <= eindex) { c = string[i]; @@ -5363,7 +5378,7 @@ array_remove_pattern (var, pattern, patspec, starsub, quoted) static char * parameter_brace_remove_pattern (varname, value, ind, patstr, rtype, quoted, flags) char *varname, *value; - int ind; + arrayind_t ind; char *patstr; int rtype, quoted, flags; { @@ -8053,7 +8068,7 @@ valid_parameter_transform (xform) static char * parameter_brace_transform (varname, value, ind, xform, rtype, quoted, pflags, flags) char *varname, *value; - int ind; + arrayind_t ind; char *xform; int rtype, quoted, pflags, flags; { @@ -8190,7 +8205,7 @@ mb_substring (string, s, e) static char * parameter_brace_substring (varname, value, ind, substr, quoted, pflags, flags) char *varname, *value; - int ind; + arrayind_t ind; char *substr; int quoted, pflags, flags; { @@ -8512,7 +8527,7 @@ pos_params_pat_subst (string, pat, rep, mflags) static char * parameter_brace_patsub (varname, value, ind, patsub, quoted, pflags, flags) char *varname, *value; - int ind; + arrayind_t ind; char *patsub; int quoted, pflags, flags; { @@ -8749,7 +8764,8 @@ pos_params_modcase (string, pat, modop, mflags) static char * parameter_brace_casemod (varname, value, ind, modspec, patspec, quoted, pflags, flags) char *varname, *value; - int ind, modspec; + arrayind_t ind; + int modspec; char *patspec; int quoted, pflags, flags; {