From ec9ca40825cff944cbc601be55411eb6fd14b313 Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Mon, 25 Feb 2019 23:53:21 -0500 Subject: [PATCH 1/9] Remove scalars_or_empty --- src/builtin.jq | 1 - tests/shtest | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index d3a4bb16d1..23c5a5647c 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -58,7 +58,6 @@ def strings: select(type == "string"); def nulls: select(type == "null"); def values: select(. != null); def scalars: select(. == null or . == true or . == false or type == "number" or type == "string"); -def scalars_or_empty: select(. == null or . == true or . == false or type == "number" or type == "string" or ((type=="array" or type=="object") and length==0)); def leaf_paths: paths(scalars); def join($x): reduce .[] as $i (null; (if .==null then "" else .+$x end) + diff --git a/tests/shtest b/tests/shtest index fc2ef15230..a4fec6d6af 100755 --- a/tests/shtest +++ b/tests/shtest @@ -155,7 +155,7 @@ cmp $d/out $d/expected ## If we add an option to stream to the `import ... as $symbol;` directive ## then we can move these tests into tests/all.test. -$VALGRIND $Q $JQ -c '. as $d|path(..) as $p|$d|getpath($p)|scalars_or_empty|[$p,.]' < "$JQTESTDIR/torture/input0.json" > $d/out0 +$VALGRIND $Q $JQ -c '. as $d|path(..) as $p|$d|getpath($p)|select((type|. != "array" and . != "object") or length==0)|[$p,.]' < "$JQTESTDIR/torture/input0.json" > $d/out0 $VALGRIND $Q $JQ --stream -c '.|select(length==2)' < "$JQTESTDIR/torture/input0.json" > $d/out1 diff $d/out0 $d/out1 From e59a8c3ab5bdee156a67aae526316e794cf195ad Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Tue, 26 Feb 2019 05:18:09 -0500 Subject: [PATCH 2/9] Redefine isempty/1, and the all and any series Fixes the bug where all and any evaluate one more item than needed. --- src/builtin.jq | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index 23c5a5647c..e3d0b7ffe5 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -32,20 +32,6 @@ def index($i): indices($i) | .[0]; # TODO: optimize def rindex($i): indices($i) | .[-1:][0]; # TODO: optimize def paths: path(recurse(if (type|. == "array" or . == "object") then .[] else empty end))|select(length > 0); def paths(node_filter): . as $dot|paths|select(. as $p|$dot|getpath($p)|node_filter); -def any(generator; condition): - [label $out | foreach generator as $i - (false; - if . then break $out elif $i | condition then true else . end; - if . then . else empty end)] | length == 1; -def any(condition): any(.[]; condition); -def any: any(.); -def all(generator; condition): - [label $out | foreach generator as $i - (true; - if .|not then break $out elif $i | condition then . else false end; - if .|not then . else empty end)] | length == 0; -def all(condition): all(.[]; condition); -def all: all(.); def isfinite: type == "number" and (isinfinite | not); def arrays: select(type == "array"); def objects: select(type == "object"); @@ -170,8 +156,14 @@ def limit($n; exp): if $n > 0 then label $out | foreach exp as $item ($n; .-1; $item, if . <= 0 then break $out else empty end) elif $n == 0 then empty else exp end; -def isempty(g): 0 == ((label $go | g | (1, break $go)) // 0); def first(g): label $out | g | ., break $out; +def isempty(g): first((g|false), true); +def all(generator; condition): isempty(generator|condition and empty); +def any(generator; condition): isempty(generator|condition or empty)|not; +def all(condition): all(.[]; condition); +def any(condition): any(.[]; condition); +def all: all(.[]; .); +def any: any(.[]; .); def last(g): reduce g as $item (null; $item); def nth($n; g): if $n < 0 then error("nth doesn't support negative indices") else last(limit($n + 1; g)) end; def first: .[0]; From ebae48a4ff5dbdd0bf05e6f8a5aa71666e5e401d Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Sat, 23 Feb 2019 19:47:51 -0500 Subject: [PATCH 3/9] Add more tests for any and all --- tests/jq.test | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/jq.test b/tests/jq.test index f330fb0f6d..539ee7d832 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -851,6 +851,51 @@ false [1,2,3,4,true] true +# Check short-circuiting +any(true, error; .) +"badness" +true + +all(false, error; .) +"badness" +false + +any(not) +[] +false + +all(not) +[] +true + +any(not) +[false] +true + +all(not) +[false] +true + +[any,all] +[] +[false,true] + +[any,all] +[true] +[true,true] + +[any,all] +[false] +[false,false] + +[any,all] +[true,false] +[true,false] + +[any,all] +[null,null,true] +[true,false] + # # Paths # From 2c26fc4c49f0efece7bb9d946d9800e1638e2fd8 Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Tue, 26 Feb 2019 05:21:31 -0500 Subject: [PATCH 4/9] Simplify type-select builtins --- src/builtin.jq | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index e3d0b7ffe5..2c7bebeb89 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -35,15 +35,15 @@ def paths(node_filter): . as $dot|paths|select(. as $p|$dot|getpath($p)|node_fil def isfinite: type == "number" and (isinfinite | not); def arrays: select(type == "array"); def objects: select(type == "object"); -def iterables: arrays, objects; +def iterables: select(type|. == "array" or . == "object"); def booleans: select(type == "boolean"); def numbers: select(type == "number"); def normals: select(isnormal); def finites: select(isfinite); def strings: select(type == "string"); -def nulls: select(type == "null"); +def nulls: select(. == null); def values: select(. != null); -def scalars: select(. == null or . == true or . == false or type == "number" or type == "string"); +def scalars: select(type|. != "array" and . != "object"); def leaf_paths: paths(scalars); def join($x): reduce .[] as $i (null; (if .==null then "" else .+$x end) + @@ -195,7 +195,7 @@ def repeat(exp): def _repeat: exp, _repeat; _repeat; -def inputs: try repeat(input) catch if .=="break" then empty else .|error end; +def inputs: try repeat(input) catch if .=="break" then empty else error end; # like ruby's downcase - only characters A to Z are affected def ascii_downcase: explode | map( if 65 <= . and . <= 90 then . + 32 else . end) | implode; From bbd54abeae2096eb251ec09e8d2ab5fbea2d9586 Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Tue, 26 Feb 2019 05:22:18 -0500 Subject: [PATCH 5/9] Simplify definition of range/3 New implementation in terms of while/2, and branches immediately on $by to avoid checking the sign of $by *in* the loop. --- src/builtin.jq | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index 2c7bebeb89..6b092bc04d 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -138,11 +138,6 @@ def gsub($re; s; flags): sub($re; s; flags + "g"); def gsub($re; s): sub($re; s; "g"); ######################################################################## -# range/3, with a `by` expression argument -def range($init; $upto; $by): - def _range: - if ($by > 0 and . < $upto) or ($by < 0 and . > $upto) then ., ((.+$by)|_range) else . end; - if $by == 0 then $init else $init|_range end | select(($by > 0 and . < $upto) or ($by < 0 and . > $upto)); # generic iterator/generator def while(cond; update): def _while: @@ -156,6 +151,11 @@ def limit($n; exp): if $n > 0 then label $out | foreach exp as $item ($n; .-1; $item, if . <= 0 then break $out else empty end) elif $n == 0 then empty else exp end; +# range/3, with a `by` expression argument +def range($init; $upto; $by): + if $by > 0 then $init|while(. < $upto; . + $by) + elif $by < 0 then $init|while(. > $upto; . + $by) + else empty end; def first(g): label $out | g | ., break $out; def isempty(g): first((g|false), true); def all(generator; condition): isempty(generator|condition and empty); From 26892766461bfdbab469d76bc5b313cf8e176a2d Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Tue, 26 Feb 2019 05:33:36 -0500 Subject: [PATCH 6/9] Reimplement fromstream/1 more compactly --- src/builtin.jq | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index 6b092bc04d..7916d7d8f8 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -206,26 +206,14 @@ def ascii_upcase: # Streaming utilities def truncate_stream(stream): . as $n | null | stream | . as $input | if (.[0]|length) > $n then setpath([0];$input[0][$n:]) else empty end; -def fromstream(i): - foreach i as $i ( - [null, null]; - - if ($i | length) == 2 then - if ($i[0] | length) == 0 then . - else [ ( .[0] | setpath($i[0]; $i[1]) ), .[1] ] - end - elif ($i[0] | length) == 1 then [ null, .[0] ] - else . - end; - - if ($i | length) == 1 then - if ($i[0] | length) == 1 then .[1] - else empty - end - elif ($i[0] | length) == 0 then $i[1] - else empty - end - ); +def fromstream(i): {x: null, e: false} as $init | + # .x = object being built; .e = emit and reset state + foreach i as $i ($init + ; if .e then $init else . end + | if $i|length == 2 + then setpath(["e"]; $i[0]|length==0) | setpath(["x"]+$i[0]; $i[1]) + else setpath(["e"]; $i[0]|length==1) end + ; if .e then .x else empty end); def tostream: {string:true,number:true,boolean:true,null:true} as $leaf_types | . as $dot | From 97834e1929b6567ee2ebee56a04251f6b21906a6 Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Tue, 26 Feb 2019 05:35:59 -0500 Subject: [PATCH 7/9] Make tostream much more efficient (`path(.[])` is a streaming `keys`!) --- src/builtin.jq | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index 7916d7d8f8..f45802f6a1 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -215,22 +215,9 @@ def fromstream(i): {x: null, e: false} as $init | else setpath(["e"]; $i[0]|length==1) end ; if .e then .x else empty end); def tostream: - {string:true,number:true,boolean:true,null:true} as $leaf_types | - . as $dot | - if $leaf_types[$dot|type] or length==0 then [[],$dot] - else - # We really need a _streaming_ form of `keys`. - # We can use `range` for arrays, but not for objects. - keys_unsorted as $keys | - $keys[-1] as $last| - ((# for each key - $keys[] | . as $key | - $dot[$key] | . as $dot | - # recurse on each key/value - tostream|.[0]|=[$key]+.), - # then add the closing marker - [[$last]]) - end; + path(def r: (.[]?|r), .; r) as $p | + getpath($p) | + reduce path(.[]?) as $q ([$p, .]; [$p+$q]); # Assuming the input array is sorted, bsearch/1 returns From c6a981e73a01af5b9cde5761a2e8eef5fbe6ec6a Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Tue, 26 Feb 2019 05:40:01 -0500 Subject: [PATCH 8/9] Fix bizarre bsearch/1 behaviour with a stream argument --- src/builtin.jq | 10 +++++----- tests/jq.test | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index f45802f6a1..321358c6e2 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -224,10 +224,10 @@ def tostream: # the index of the target if the target is in the input array; and otherwise # (-1 - ix), where ix is the insertion point that would leave the array sorted. # If the input is not sorted, bsearch will terminate but with irrelevant results. -def bsearch(target): +def bsearch($target): if length == 0 then -1 elif length == 1 then - if target == .[0] then 0 elif target < .[0] then -1 else -2 end + if $target == .[0] then 0 elif $target < .[0] then -1 else -2 end else . as $in # state variable: [start, end, answer] # where start and end are the upper and lower offsets to use. @@ -237,14 +237,14 @@ def bsearch(target): else ( ( (.[1] + .[0]) / 2 ) | floor ) as $mid | $in[$mid] as $monkey - | if $monkey == target then (.[2] = $mid) # success + | if $monkey == $target then (.[2] = $mid) # success elif .[0] == .[1] then (.[1] = -1) # failure - elif $monkey < target then (.[0] = ($mid + 1)) + elif $monkey < $target then (.[0] = ($mid + 1)) else (.[1] = ($mid - 1)) end end ) | if .[2] == null then # compute the insertion point - if $in[ .[0] ] < target then (-2 -.[0]) + if $in[ .[0] ] < $target then (-2 -.[0]) else (-1 -.[0]) end else .[2] diff --git a/tests/jq.test b/tests/jq.test index 539ee7d832..b85f897d93 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -1427,8 +1427,10 @@ ascii_upcase "useful but not for é" "USEFUL BUT NOT FOR é" -bsearch(4) +bsearch(0,2,4) [1,2,3] +-1 +1 -4 # strptime tests are in optional.test From 93ff9caede106a6174f0766303f095fa068e846c Mon Sep 17 00:00:00 2001 From: Muh Muhten Date: Tue, 26 Feb 2019 05:54:58 -0500 Subject: [PATCH 9/9] Simplify and optimize SQLish builtins --- src/builtin.jq | 10 +++------- tests/jq.test | 11 +++++++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/builtin.jq b/src/builtin.jq index 321358c6e2..ee78017609 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -263,11 +263,7 @@ def walk(f): # SQL-ish operators here: def INDEX(stream; idx_expr): - reduce stream as $row ({}; - .[$row|idx_expr| - if type != "string" then tojson - else . - end] |= $row); + reduce stream as $row ({}; .[$row|idx_expr|tostring] = $row); def INDEX(idx_expr): INDEX(.[]; idx_expr); def JOIN($idx; idx_expr): [.[] | [., $idx[idx_expr]]]; @@ -275,5 +271,5 @@ def JOIN($idx; stream; idx_expr): stream | [., $idx[idx_expr]]; def JOIN($idx; stream; idx_expr; join_expr): stream | [., $idx[idx_expr]] | join_expr; -def IN(s): reduce (first(select(. == s)) | true) as $v (false; if . or $v then true else false end); -def IN(src; s): reduce (src|IN(s)) as $v (false; if . or $v then true else false end); +def IN(s): any(s == .; .); +def IN(src; s): any(src == s; .); diff --git a/tests/jq.test b/tests/jq.test index b85f897d93..28b325f997 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -1575,6 +1575,17 @@ true true true +range(5;13)|IN(range(0;10;3)) +null +false +true +false +false +true +false +false +false + range(10;12)|IN(range(10)) null false