From b9d10c5a9ce204a67bce0c62eefb81e9890f12fc Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Fri, 11 Sep 2020 09:33:29 +0200 Subject: [PATCH] Fix 'command' expansion bug and POSIX compliance The 'command' name can now result from an expansion, e.g.: c=command; "$c" ls set -- command ls; "$@" both work now. This fixes BUG_CMDEXPAN. If -o posix is on, 'command' now disables not only the "special" but also the "declaration" properties of builtin commands that it invokes. This is because POSIX specifies 'command' as a simple regular builtin, and any command name following 'command' is just an argument to the 'command' command, so there is nothing that allows any further arguments (such as assignment-arguments) to be treated specially by the parser. So, if and only if -o posix is on: a. Arguments that start with a variable name followed by '=' are always treated as regular words subject to normal shell syntax. b. Since assignment-arguments are not processed as assignments before the command itself, 'command' can now stop the shell from exiting (as required by the standard) if a command that it invokes (such as 'export') tries to modify a readonly variable. This fixes BUG_CMDSPEXIT. Most of 'command' is integrated in the parser and parse tree executer, so that is where it needed fixing. src/cmd/ksh93/sh/parse.c: simple(): - If the posix option is on, do not skip past SYSCOMMAND so that any declaration builtin commands that are arguments to 'command' are not detected and thus not treated specially at parsetime. src/cmd/ksh93/sh/xec.c: sh_exec(): - When detecting SYSCOMMAND in order to skip past it, not only compare the Namval_t pointer 'np' to SYSCOMMAND, but also handle the case where that pointer is NULL, as when the command name results from an expansion. In that case, search the function tree shp->fun_tree for the name and see if that yields the SYSCOMMAND pointer. fun_tree is initialised with a dtview to bltin_tree, so searching fun_tree instead allows for overriding 'command' with a shell function (which the POSIX standard requires us to allow). src/cmd/ksh93/sh.1, src/cmd/ksh93/data/builtins.c: - Update documentation to match these changes. - Various related edits and improvements. src/cmd/ksh93/tests/builtins.sh: - Check that 'command' works if resulting from an expansion. - Check that 'command' can be overridden by a shell function. --- NEWS | 14 +++++ TODO | 14 ----- src/cmd/ksh93/data/builtins.c | 18 +++--- src/cmd/ksh93/include/version.h | 2 +- src/cmd/ksh93/sh.1 | 99 +++++++++++++++++++-------------- src/cmd/ksh93/sh/parse.c | 2 +- src/cmd/ksh93/sh/xec.c | 2 +- src/cmd/ksh93/tests/builtins.sh | 21 +++++++ 8 files changed, 106 insertions(+), 66 deletions(-) diff --git a/NEWS b/NEWS index 8ab758819ddc..87a281a46343 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,20 @@ For full details, see the git log at: https://github.com/ksh93/ksh Any uppercase BUG_* names are modernish shell bug IDs. +2020-09-11: + +- The 'command' regular builtin utility (which runs a simple command, removing + special properties) has been made fully POSIX compliant. + 1. The 'command' name can now result from an expansion (fixing BUG_CMDEXPAN), + e.g. 'c=command; "$c" ls' and 'set -- command ls; "$@"' now work. + 2. If and only if the POSIX mode (the new -o posix shell option) is active, + then the 'command' utility now disables not only "special" but also + "declaration" properties of builtin commands that it invokes, meaning: + a. arguments that start with a variable name followed by '=' are + always treated as regular words subject to normal shell syntax; + b. 'command' can now stop the shell from exiting if a command that it + invokes tries to modify a readonly variable (fixing BUG_CMDSPEXIT). + 2020-09-09: - Fixed BUG_LOOPRET2 and related bugs. The 'exit' and 'return' commands without diff --git a/TODO b/TODO index df46db562976..bf53eca325f8 100644 --- a/TODO +++ b/TODO @@ -24,20 +24,6 @@ https://github.com/modernish/modernish/tree/0.16/lib/modernish/cap/ initial ! (and, on some shells, ^) retains the meaning of negation, even in quoted strings within bracket patterns, including quoted variables. -- BUG_CMDEXPAN: if the 'command' command results from an expansion, it acts - like 'command -v', showing the path of the command instead of executing it. - For example: - v=command; "$v" ls - or - set -- command ls; "$@" - don't work. - See also: https://github.com/att/ast/issues/963 - -- BUG_CMDSPEXIT: preceding a "special builtin"[*] (other than 'eval', 'exec', - 'return' or 'exit') with 'command' does not always stop it from exiting - the shell if the builtin encounters error. - [*] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_14 - - BUG_CSUBSTDO: If standard output (file descriptor 1) is closed before entering a $(command substitution), and any other file descriptors are redirected within the command substitution, commands such as 'echo' will diff --git a/src/cmd/ksh93/data/builtins.c b/src/cmd/ksh93/data/builtins.c index 71e8555ad467..f203b24fc1f7 100644 --- a/src/cmd/ksh93/data/builtins.c +++ b/src/cmd/ksh93/data/builtins.c @@ -470,18 +470,22 @@ USAGE_LICENSE ; const char sh_optcommand[] = -"[-1c?\n@(#)$Id: command (AT&T Research) 2003-08-01 $\n]" +"[-1c?\n@(#)$Id: command (AT&T Research/ksh93) 2020-09-09 $\n]" USAGE_LICENSE -"[+NAME?command - execute a simple command]" +"[+NAME?command - execute a simple command disabling special properties]" "[+DESCRIPTION?Without \b-v\b or \b-V\b, \bcommand\b executes \acommand\a " "with arguments given by \aarg\a, suppressing the shell function lookup " - "that normally occurs. In addition, if \acommand\a is a special " - "built-in command, then the special properties are removed so that " - "failures will not cause the script that executes it to terminate.]" + "that normally occurs. If \acommand\a is a special built-in command, " + "then the special properties are removed so that failures will not " + "cause the script that executes it to terminate and preceding " + "assignments will not persist beyond the command invocation. " + "If \acommand\a is a declaration built-in command and the " + "\b-o posix\b shell option is on, then the declaration properties are " + "removed so that arguments containing \b=\b are not treated specially.]" "[+?With the \b-v\b or \b-V\b options, \bcommand\b is equivalent to the " "\bwhence\b(1) command.]" -"[p?Causes a default path to be searched rather than the one defined by the " - "value of \bPATH\b.]" +"[p?Instead of \b$PATH\b, search the OS's default utility path as output by " + "\bgetconf PATH\b.]" "[v?Equivalent to \bwhence\b \acommand\a [\aarg\a ...]].]" "[x?If \acommand\a fails because there are too many \aarg\as, it will be " "invoked multiple times with a subset of the arguments on each " diff --git a/src/cmd/ksh93/include/version.h b/src/cmd/ksh93/include/version.h index f24a1351fabb..6e32d374dd55 100644 --- a/src/cmd/ksh93/include/version.h +++ b/src/cmd/ksh93/include/version.h @@ -17,4 +17,4 @@ * David Korn * * * ***********************************************************************/ -#define SH_RELEASE "93u+m 2020-09-09" +#define SH_RELEASE "93u+m 2020-09-11" diff --git a/src/cmd/ksh93/sh.1 b/src/cmd/ksh93/sh.1 index e77a4df1484b..8568aa1973c3 100644 --- a/src/cmd/ksh93/sh.1 +++ b/src/cmd/ksh93/sh.1 @@ -3691,7 +3691,7 @@ are equivalent (as far as the above execution of .I cmd\^ is concerned except for special built-in commands listed below \- those that are -preceded with a dagger). +marked with \fB\(dg\fR). .PP If the obsolete .B \-k @@ -5495,10 +5495,11 @@ Commands that are preceded by a \(dd symbol below are Any following words that are in the format of a variable assignment are expanded with the same rules as a variable assignment. -This means that -tilde substitution is performed after the +This means that tilde substitution is performed after the .B = -sign and field splitting and file name generation are not +sign, array assignments of the form +\f2varname\^\fP\f3=(\fP\f2assign_list\^\fP\f3)\fP +are supported, and field splitting and file name generation are not performed. .PD .TP @@ -5650,7 +5651,7 @@ via the file will associate with the pathname of the directory containing the .B .paths file. -.P +.IP The ISO C/C++ prototype is \f3b_\fP\f2mycommand\fP\f3(int\fP \f2argc\fP, \f3char *\fP\f2argv\fP\f3[]\fP, \f3void *\fP\f2context\fP\f3)\fP for the builtin command @@ -5663,7 +5664,7 @@ elements and context is an optional pointer to a .B Shell_t structure as described in .BR . -.sp .5 +.IP Special built-ins cannot be bound to a pathname or deleted. The .B \-d @@ -5770,6 +5771,20 @@ command may not be executed by .if \nZ=2 .B rksh93\^. .TP \f3command\fP \*(OK \f3\-pvxV\fP \*(CK \f2name\^\fP \*(OK \f2arg\^\fP .\|.\|. \*(CK +With the +.B \-v +option, +.B command +is equivalent to the built-in +.B whence +command described below. +The +.B \-V +option causes +.B command +to act like +.BR "whence \-v" . +.IP Without the .B \-v or @@ -5780,53 +5795,52 @@ executes .I name\^ with the arguments given by .IR arg . -The -.B \-p -option causes -the operating system's standard utilities path -(as output by \f3getconf PATH\fP) to be searched -rather than the one defined by the value of -.SM -.BR PATH . Functions and aliases will not be searched for when finding .IR name . -In addition, if +If .I name\^ refers to a special built-in, -none of the special properties associated with the leading -daggers will be honored. +as marked with \fB\(dg\fR in this manual, +.B command +disables the special properties described above for that mark, +executing the command as a regular built-in. (For example, using -.B "command set -o" +.B "command set \-o" .I "option-name" prevents a script from terminating when an invalid option name is given.) -With the -.B \-x -option, -if command execution would result in a failure because -there are too many arguments, errno -.SM -.BR E2BIG , -the shell will invoke command +If .I name\^ -multiple times with a subset of the arguments on each invocation. -Arguments that occur prior to the first word that -expands to multiple arguments and after the last word -that expands to multiple arguments will be passed on each invocation. -The exit status will be the maximum invocation exit status. -With the -.B \-v -option, -.B command -is equivalent to the built-in -.B whence -command described below. +refers to a declaration built-in, +as marked with \fB\(dd\fR in this manual, +and the \fBposix\fR shell option is on, +then the declaration properties are removed so that arguments containing +\fB=\fR are not treated specially. +.IP The -.B \-V +.B \-p option causes -.B command -to act like -.BR "whence \-v" . +the operating system's standard utilities path +(as output by \f3getconf PATH\fP) to be searched +rather than the one defined by the value of +.SM +.BR PATH . +.IP +The +.B \-x +option allows executing non-built-in commands with argument lists exceeding +limitations imposed by the operating system. This functionality is similar to +.BR xargs (1) +but is easier to use. +If a command cannot ordinarily be executed because there are too many +arguments, the shell will invoke the indicated command multiple times +with a subset of the arguments on each invocation. +Any arguments (such as command options) that come before the first word +that expands to multiple arguments, as well as any that follow the last +word that expands to multiple arguments, are considered static arguments +and are repeated for each invocation. When all invocations are completed, +.B "command \-x" +exits with the status of the invocation that had the highest exit status. .TP \(dd \f3compound\fP \f2vname\fP\*(OK\f3=\fP\f2value\^\fP\*(CK .\|.\|. Causes each @@ -7044,6 +7058,7 @@ is on by default if ksh is invoked as \fBsh\fR. It disables passing exported variables' attributes (such as integer or readonly) to a new ksh process through the environment, causes file descriptors > 2 to be left open when invoking another program, makes the \fB<>\fR redirection operator default to standard input, +causes the \fBcommand\fR utility to disable declaration command properties of any declaration commands it invokes, disables a hack that makes \fBtest -t\fR (\fB[ -t ]\fR) equivalent to \fBtest -t 1\fR (\fB[ -t 1 ]\fR), enables octal numbers in \fBlet\fR shell arithmetic (see \fBletoctal\fR), and disables the \fB&>\fR redirection shorthand. diff --git a/src/cmd/ksh93/sh/parse.c b/src/cmd/ksh93/sh/parse.c index 921b455ca263..1a340eaf6683 100644 --- a/src/cmd/ksh93/sh/parse.c +++ b/src/cmd/ksh93/sh/parse.c @@ -1456,7 +1456,7 @@ static Shnode_t *simple(Lex_t *lexp,int flag, struct ionod *io) lexp->intypeset = 1; key_on = 1; } - else if(np==SYSCOMMAND) + else if(np==SYSCOMMAND && !sh_isoption(SH_POSIX)) cmdarg++; else if(np==SYSEXEC) lexp->inexec = 1; diff --git a/src/cmd/ksh93/sh/xec.c b/src/cmd/ksh93/sh/xec.c index 122a1db8e7bf..81a35020e202 100644 --- a/src/cmd/ksh93/sh/xec.c +++ b/src/cmd/ksh93/sh/xec.c @@ -1011,7 +1011,7 @@ int sh_exec(register const Shnode_t *t, int flags) #endif /* SHOPT_NAMESPACE */ com0 = com[0]; shp->xargexit = 0; - while(np==SYSCOMMAND) + while(np==SYSCOMMAND || !np && com0 && nv_search(com0,shp->fun_tree,0)==SYSCOMMAND) { register int n = b_command(0,com,&shp->bltindata); if(n==0) diff --git a/src/cmd/ksh93/tests/builtins.sh b/src/cmd/ksh93/tests/builtins.sh index c53987452821..ac4a570e2c3c 100755 --- a/src/cmd/ksh93/tests/builtins.sh +++ b/src/cmd/ksh93/tests/builtins.sh @@ -514,6 +514,27 @@ function longline do print argument$i done } +# test that 'command' can result from expansion and can be overridden by a function +expect=all\ ok +c=command +actual=$("$c" echo all ok 2>&1) +[[ $actual == "$expect" ]] || err_exit '"command" utility from "$c" expansion not executed' \ + "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" +set -- command echo all ok +actual=$("$@" 2>&1) +[[ $actual == "$expect" ]] || err_exit '"command" utility from "$@" expansion not executed' \ + "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" +# must use 'eval' in following test as 'command' is integrated in parser and ksh likes to parse ahead +actual=$(function command { echo all ok; }; eval 'command echo all wrong') +[[ $actual == "$expect" ]] || err_exit '"command" failed to be overridden by function' \ + "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" +actual=$(function command { echo all ok; }; "$c" echo all wrong) +[[ $actual == "$expect" ]] || err_exit '"command" from "$c" expansion failed to be overridden by function' \ + "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" +set -- command echo all wrong +actual=$(function command { echo all ok; }; "$@") +[[ $actual == "$expect" ]] || err_exit '"command" from "$@" expansion failed to be overridden by function' \ + "(expected $(printf %q "$expect"), got $(printf %q "$actual"))" # test command -x option integer sum=0 n=10000 if ! ${SHELL:-ksh} -c 'print $#' count $(longline $n) > /dev/null 2>&1