Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Word splitting: filter out empty fields #15

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/command.lem
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,7 @@ let builtin_read s0 argv env =
let trimmed = trimr_one_newline line in
let w = [ExpS trimmed] in
let ifs = field_splitting s1 w in
let f = finalize_fields (combine_fields ifs) in
let f = finalize_fields (combine_fields ifs) (Just (length vars)) in
let ec = if read_eof hit_eof then 1 else 0 in
read_assign_vars (exit_with ec s1) vars f
end
Expand Down
98 changes: 59 additions & 39 deletions src/fields.lem
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ val split_fields : forall 'a. OS 'a => os_state 'a -> expanded_words -> intermed
val clean_fields : intermediate_fields -> intermediate_fields
val skip_field_splitting : expanded_words -> intermediate_fields
val field_splitting : forall 'a. OS 'a => os_state 'a -> expanded_words -> intermediate_fields

let is_ws c = elem c (toCharList " \n\t")


let default_ifs = toCharList " \t\n"

let is_ws c = elem c default_ifs

let rec collect_non_ifs ifs ls =
match ls with
| [] -> ([], [])
Expand All @@ -46,19 +48,26 @@ let rec split_expstring ifs clst =
| [] -> []
| c::cs ->
if elem c ifs
then (if is_ws c then WFS else FS)::split_expstring ifs cs
then (if is_ws c then WFS [c] else FS [c])::split_expstring ifs cs
else let (cc, cs1) = collect_non_ifs ifs cs in
(Field (symbolic_string_of_char_list (c::cc)))::(split_expstring ifs cs1)
end

let rec consume_separators p a =
match p with
| UsrF::wrds -> consume_separators wrds (WFS []::a)
| _ -> (p, a)
end

let rec split_word ifs p =
match p with
| (f, []) -> f
| (f, UsrF::UsrF::wrds) -> split_word ifs (f, UsrF::wrds)
| (f, UsrF::wrds) -> split_word ifs (f ++ [FS], wrds)
| (f, UsrF::wrds) -> split_word ifs (f ++ [FS []], wrds)
| (f, ExpS(s)::wrds) ->
let new_fields = split_expstring ifs (toCharList s) in
split_word ifs (f ++ new_fields, wrds)
let (wrds', separators) = consume_separators wrds [] in
split_word ifs (f ++ new_fields ++ separators, wrds')
| (f, UsrS(s)::wrds) -> split_word ifs (f ++ [Field (symbolic_string_of_string s)], wrds)
| (f, At(fs)::wrds) -> split_word ifs (f ++ map Field fs, wrds)
| (f, DQuo(ss)::wrds) -> split_word ifs (f ++ [QField ss], wrds)
Expand Down Expand Up @@ -102,25 +111,25 @@ let rec skip_field_splitting w =
these are distinct from empty fields, which ought to have `UsrS
""` or `ExpS ""` or `DQuo ...` in them. *)
| UsrF::UsrF::ws -> skip_field_splitting (UsrF::ws)
| UsrF::ws -> FS::skip_field_splitting ws
| UsrF::ws -> FS []::skip_field_splitting ws
| UsrS s::ws -> Field (symbolic_string_of_string s)::skip_field_splitting ws
| ExpS s::ws -> Field (symbolic_string_of_string s)::skip_field_splitting ws
| DQuo s::ws -> QField s::skip_field_splitting ws
| At fs::ws -> intersperse FS (List.map QField fs) ++ skip_field_splitting ws
| At fs::ws -> intersperse (FS []) (List.map QField fs) ++ skip_field_splitting ws
| EWSym sym::ws -> Field [Sym sym]::skip_field_splitting ws
end

let split_fields s0 exp_words =
let ifs = lookup_string_param s0 "IFS" in
match ifs with
| Nothing -> split_word (toCharList " \n\t") ([], exp_words)
| Nothing -> split_word default_ifs ([], exp_words)
| Just fs ->
match try_concrete fs with
| Nothing ->
(* TODO 2018-08-29 unsoundly using default IFS. symbolic IFS
is basically impossible. should record this
unsoundness. *)
split_word (toCharList " \n\t") ([], exp_words)
split_word default_ifs ([], exp_words)
| Just "" ->
(* If the value of IFS is null, no field splitting shall be performed. *)
skip_field_splitting exp_words
Expand All @@ -131,28 +140,28 @@ let split_fields s0 exp_words =
let rec combine_fields f =
match f with
| [] -> []
| [WFS] -> [] (* Remove trailing field separators *)
| (WFS::WFS::rst) -> combine_fields (WFS::rst) (* Combine adjacent whitespace separators *)
| (WFS::FS::rst) -> combine_fields (FS::rst)
| (FS::WFS::rst) -> combine_fields (FS::rst)
| [WFS _] -> [] (* Remove trailing field separators *)
| (WFS f::WFS s::rst) -> combine_fields (WFS (f ++ s)::rst) (* Combine adjacent whitespace separators *)
| (WFS f::FS s::rst) -> combine_fields (FS (f ++ s)::rst)
| (FS f::WFS s::rst) -> combine_fields (FS (f ++ s)::rst)
| (Field(s1)::Field(s2)::rst) -> combine_fields (Field(s1 ++ s2)::rst)
| (QField(s1)::QField(s2)::rst) -> combine_fields (QField(s1 ++ s2)::rst)
| (QField(s1)::Field(s2)::rst) -> combine_fields (Field((escape_patterns s1) ++ s2)::rst)
| (Field(s1)::QField(s2)::rst) -> combine_fields (Field(s1 ++ (escape_patterns s2))::rst)
| (WFS::rst) -> FS::(combine_fields rst)
| (WFS f::rst) -> FS f::(combine_fields rst)
| (f::rst) -> f::(combine_fields rst)
end

let rec clean_fields f =
match f with
| (WFS::rst) -> clean_fields rst
| (WFS _::rst) -> clean_fields rst
| _ -> combine_fields f
end

let debug_tmp_field tf =
match tf with
| WFS -> "WFS"
| FS -> "FS"
| WFS c -> "WFS(" ^ toString c ^ ")"
| FS c -> "FS(" ^ toString c ^ ")"
| Field s -> "Field(" ^ string_of_symbolic_string s ^ ")"
| QField s -> "QField(" ^ string_of_symbolic_string s ^ ")"
end
Expand All @@ -170,7 +179,7 @@ let rec insert_field_separators fs =
match fs with
| [] -> []
| [f] -> [Field (symbolic_string_of_string f)]
| f::fs' -> Field (symbolic_string_of_string f)::FS::insert_field_separators fs'
| f::fs' -> Field (symbolic_string_of_string f)::FS []::insert_field_separators fs'
end

val pathname_expansion : forall 'a. OS 'a => os_state 'a -> intermediate_fields -> intermediate_fields
Expand Down Expand Up @@ -216,8 +225,10 @@ let rec pathname_expansion s0 f =
*)

val remove_quotes : intermediate_fields -> intermediate_fields
val to_fields : intermediate_fields -> fields
val finalize_fields : intermediate_fields -> fields
val to_single_field : list (symbolic_string * symbolic_string) -> symbolic_string
val to_fields : list (symbolic_string * symbolic_string) -> maybe nat -> fields
val normalize_fields : intermediate_fields -> list (symbolic_string * symbolic_string)
val finalize_fields : intermediate_fields -> maybe nat -> fields
val quote_removal : intermediate_fields -> fields

let rec remove_quotes f =
Expand All @@ -227,26 +238,35 @@ let rec remove_quotes f =
| f :: rst -> f::remove_quotes rst
end

let rec to_fields f =
match f with
| [] -> []
| Field fs::rst -> fs::(to_fields rst)
| FS::FS::rst -> (symbolic_string_of_string "")::(to_fields (FS::rst))
| FS::rst -> to_fields rst
(* TODO 2017-12-01 Preferably we need an AST that doesn't have WFS or QField's at this point, or lem failure *)
| WFS::_ ->
Assert_extra.failwith "broken invariant in to_fields: didn't expect WFS"
| QField _::_ ->
Assert_extra.failwith "broken invariant in to_fields: didn't expect QField"
end

let rec finalize_fields f =
match f with
| (FS::rst) -> symbolic_string_of_string ""::finalize_fields rst
| _ -> to_fields f
let rec to_single_field f =
let extract (f, s) = f ++ s in
concatMap extract f

let rec to_fields f l =
match l with
| Just 1 -> [to_single_field f]
| _ ->
match f with
| [] -> []
| (f, _)::rst -> f::to_fields rst (bind l (fun n -> Just (n - 1)))
end
end

let rec normalize_fields fs =
let ssocl = symbolic_string_of_char_list in
match fs with
| [] -> []
| Field f::FS x::rst -> (f, ssocl x)::normalize_fields rst
| FS x::rst -> (ssocl [], ssocl x)::normalize_fields rst
| Field f::rst -> (f, ssocl [])::normalize_fields rst
| WFS _::_ -> Assert_extra.failwith "broken invariant in normalize_fields: didn't expect WFS"
| QField _::_ -> Assert_extra.failwith "broken invariant in normalize_fields: didn't expect QField"
end

let rec finalize_fields f l =
to_fields (normalize_fields f) l

let quote_removal f =
let no_quotes = combine_fields (remove_quotes f) in
finalize_fields no_quotes
finalize_fields no_quotes Nothing

4 changes: 2 additions & 2 deletions src/shim.ml
Original file line number Diff line number Diff line change
Expand Up @@ -721,8 +721,8 @@ and list_of_symbolic_string = function
String (implode cs)::list_of_symbolic_string s'
| Sym sym::s' -> json_of_symbolic sym::list_of_symbolic_string s'
and json_of_tmp_field = function
| WFS -> Assoc [tag "WFS"]
| FS -> Assoc [tag "FS"]
| WFS cl -> Assoc [tag "WFS"; ("s", String (implode cl))]
| FS cl -> Assoc [tag "FS"; ("s", String (implode cl))]
| Field s -> Assoc [tag "Field"; ("s", json_of_symbolic_string s)]
| QField s -> Assoc [tag "QField"; ("s", json_of_symbolic_string s)]
and json_of_intermediate_fields fs = List (List.map json_of_tmp_field fs)
Expand Down
20 changes: 10 additions & 10 deletions src/smoosh_prelude.lem
Original file line number Diff line number Diff line change
Expand Up @@ -1002,8 +1002,8 @@ and symbolic_string = list symbolic_char
and fields = list (symbolic_string)

and tmp_field =
WFS
| FS
WFS of list char
| FS of list char
| Field of symbolic_string
| QField of symbolic_string

Expand Down Expand Up @@ -1379,8 +1379,8 @@ let unescape_pattern = unescape_chars pattern_escs
val unescape_tmp_field : tmp_field -> tmp_field
let unescape_tmp_field tf =
match tf with
| WFS -> WFS
| FS -> FS
| WFS _ -> tf
| FS _ -> tf
| Field ss -> Field (unescape_pattern ss)
| QField ss -> QField ss
end
Expand All @@ -1391,8 +1391,8 @@ let unescape_intermediate_fields = List.map unescape_tmp_field
val unescape_heredoc_field : tmp_field -> tmp_field
let unescape_heredoc_field tf =
match tf with
| WFS -> WFS
| FS -> FS
| WFS _ -> tf
| FS _ -> tf
| Field ss -> Field (unescape_chars heredoc_escs ss)
| QField ss -> QField (unescape_chars heredoc_escs ss)
end
Expand Down Expand Up @@ -1427,8 +1427,8 @@ val symbolic_string_of_intermediate_fields : intermediate_fields -> symbolic_str
let rec symbolic_string_of_intermediate_fields ifs =
match ifs with
| [] -> []
| FS::ifs' -> C #' '::symbolic_string_of_intermediate_fields ifs'
| WFS::ifs' -> C #' '::symbolic_string_of_intermediate_fields ifs'
| FS _::ifs' -> C #' '::symbolic_string_of_intermediate_fields ifs'
| WFS _::ifs' -> C #' '::symbolic_string_of_intermediate_fields ifs'
| Field s::ifs' -> s ++ symbolic_string_of_intermediate_fields ifs'
| QField s::ifs' -> [C #'\"'] ++ s ++ [C #'\"'] ++
symbolic_string_of_intermediate_fields ifs'
Expand Down Expand Up @@ -1955,8 +1955,8 @@ and string_of_redir_state (ers, exp_redir, rs) =

and string_of_tmp_field tf =
match tf with
| WFS -> " "
| FS -> " "
| WFS cl -> if cl = [] then " " else toString cl
| FS cl -> if cl = [] then " " else toString cl
| Field s -> string_of_symbolic_string s
| QField s -> "\"" ^ string_of_symbolic_string s ^ "\""
end
Expand Down
10 changes: 10 additions & 0 deletions tests/shell/builtin.read.last.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
1
:::
2

2

3

2
:::::::::
21 changes: 21 additions & 0 deletions tests/shell/builtin.read.last.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
echo 1
IFS=':' read foo <<EOF
:::
EOF
echo $foo

echo 2
IFS=':' read foo bar baz <<EOF
:2:
EOF
echo $foo
echo $bar
echo $baz

echo 3
IFS=':' read foo bar baz <<EOF
:2::::::::::
EOF
echo $foo
echo $bar
echo $baz
16 changes: 16 additions & 0 deletions tests/shell/semantics.splitting.empty.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Default:
0
0
4
4
Colon:
0
4
4
4
Unset:
0
0
4
4
OK
33 changes: 33 additions & 0 deletions tests/shell/semantics.splitting.empty.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
arg_len() {
echo "$#"
}

echo Default:
# Default IFS
IFS='
'

arg_len $(echo) $(echo) $(echo) $(echo)
arg_len $(echo " ") $(echo " ") $(echo " ") $(echo " ")
arg_len "$(echo)" "$(echo)" "$(echo)" "$(echo)"
arg_len "$(echo " ")" "$(echo " ")" "$(echo " ")" "$(echo " ")"

echo Colon:
# Set IFS.
IFS=":"

arg_len $(echo) $(echo) $(echo) $(echo)
arg_len $(echo " ") $(echo " ") $(echo " ") $(echo " ")
arg_len "$(echo)" "$(echo)" "$(echo)" "$(echo)"
arg_len "$(echo " ")" "$(echo " ")" "$(echo " ")" "$(echo " ")"

echo Unset:
# Unset IFS is the same as the default.
unset IFS

arg_len $(echo) $(echo) $(echo) $(echo)
arg_len $(echo " ") $(echo " ") $(echo " ") $(echo " ")
arg_len "$(echo)" "$(echo)" "$(echo)" "$(echo)"
arg_len "$(echo " ")" "$(echo " ")" "$(echo " ")" "$(echo " ")"

$(echo) echo OK
5 changes: 5 additions & 0 deletions tests/shell/semantics.splitting.ifs2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
OK 1
OK 2a
OK 2b
OK 3a
OK 3b
12 changes: 12 additions & 0 deletions tests/shell/semantics.splitting.ifs2.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
arg_len() {
echo "$#"
}

IFS=":"
FOO=":"

[ "$(arg_len $FOO)" -eq 1 ] && echo OK 1
[ "$(arg_len $FOO $FOO)" -eq 2 ] && echo OK 2a
[ "$(arg_len $FOO$FOO)" -eq 2 ] && echo OK 2b
[ "$(arg_len $FOO $FOO $FOO)" -eq 3 ] && echo OK 3a
[ "$(arg_len $FOO$FOO$FOO)" -eq 3 ] && echo OK 3b