From c44941ca7d2cc196516eb16ab9748b7c4e53067e Mon Sep 17 00:00:00 2001 From: xrstf Date: Thu, 28 Dec 2023 13:19:40 +0100 Subject: [PATCH] make (delete) and (set) work with tuples/vectors/node, change (set) behaviour to return the entire datastructure (closes #2) --- cmd/rudi/docs/data/functions/delete.md.gz | Bin 687 -> 992 bytes cmd/rudi/docs/data/functions/set.md.gz | Bin 1118 -> 1383 bytes docs/stdlib/core/delete.md | 38 ++-- docs/stdlib/core/set.md | 56 +++--- pkg/builtin/core/docs/delete.md | 38 ++-- pkg/builtin/core/docs/set.md | 56 +++--- pkg/builtin/core/functions.go | 226 ++++++++++------------ pkg/builtin/core/functions_test.go | 57 ++++-- pkg/lang/ast/types.go | 74 ++++++- pkg/lang/ast/types_test.go | 4 +- 10 files changed, 300 insertions(+), 249 deletions(-) diff --git a/cmd/rudi/docs/data/functions/delete.md.gz b/cmd/rudi/docs/data/functions/delete.md.gz index 7021cef741ddf5b8876998b2a327c5f2033f8e0b..a6ad70c8f3b20759946debc2cc06211de065a94e 100644 GIT binary patch literal 992 zcmV<610Vb!iwFP!00000|EyNQj@w2LzUL|Ud;fzCI7-|tx`A#zv_LOC6btkanQ|Q=@VBNp>5Hs z$FI-~vx;(M=Gp{qBRmeUui8scW+;#86qVOI^o(mq;Z3etm;P`QmeaJ2Q=Wjjx=H7rFn^t? z6kINnErzgHpHFxMQz*&6a}HVnOU^sa8+1|V;h<-Sz9bJh8bUDNWY%~};8uYo0zX?W zt0E~ER0Z80+W?-d@e(>F9(|Cfo@!)aDaTARI<5A@*S{dv1XNtx9Ty;w z1Ug;5LOq1GUaJc*=Y$eCN~P0qCs?OzbZ}74VhMpI>%e-7x)g<3(8gypi)=^jZF4lr zp-_K%{={+N*vMl%xT2Wv%uU~~;0L(=@P74SdRLCEw?oJIONPansT|0jrbp3u%VV$i zjB|imgMk)C()Zb(t$NB{S3-0_N3uIVmBrZ8Dan0|BRa+k?Oh8jfqblQC&O>M``erJ z>I#1Og!R@@IQ7Ag>BHB5pVi@p+Qm5H1#|gQ&=*5Wrc(L7aNvA@$G$sCWl)K0pHs*Z z){S)&yW56b$G(e#=?8exxGqo|11`?Urq!nJ_C1Ql1{{{n(LcNyY{85L%%cRh*^5eY zu_`hZiH7C4`$rps!_RrmvY3(+Hw5pd1K^Nr{4GMC?O=`0qa6nhpC5`j9{=&U0o>%c z!eRN%M&xYQ{i?n`*>dmFtE;Ose#+viv^07#8ZX;-DLnl1_arR3xl8%$Q%_0|$Kccx zc4B_w2VYdEmzZOUa}JIA`8l9`>_huaB}W&%uh3{mzWaXaS~0PP{%>w0)A<% zTS3$)c@lvM7Q(Phg20NJtt&fdMcBHQ!et>$+%`!GSQn{K-(`5ksLhVhaT8HgCTvOY zT+|i2=PNWq{dYGeY=?Io#e72NX5jM`zHUv-O6KK++Wq@KE<)1E2{`z!hanIOLMS;x zx_ZN2aN{z{(3httfu*;oxE?s96+TDuv_?i-XhZv4odd%oWPZe6hffcc$KZlDP}t=J)UazG}K1 zb<2klPoUgKL&~OU|FUp!O~;=&*0UmhpDz}RdBji?x1{+{am$BMxc}?VNtg-UEPy#t zhEGq#NDdq8dvK7!-e`4JTAx)1pE7}{HNG)Q(z|y=c~;tEmkA$6RcaR0QtaSU1`T001o`Pt*Va diff --git a/cmd/rudi/docs/data/functions/set.md.gz b/cmd/rudi/docs/data/functions/set.md.gz index 289839a1292573830fd2983e01efffa2e2c7e11f..32a49918584ab78fe324cc1f6891c309a9349e64 100644 GIT binary patch literal 1383 zcmV-t1(^CDiwFP!00000|GZewj@z~qzUL_>-W=9|BkV7`_@(eIhX&21mlVAOIZ)!r zVnUGyhqBfP7RZAXJrs*R$UaJ*q!?1xN-`BYX;5DbNEG?azi+EZs0 zfB#2(QKQExJkJPgOCD_L(L}JV@>MuE7iI&s zm8w+e;Bvg-C82)`Uq1i69B;Z@fuQcILU?5j&4FD=3&wgNuZoWgu)adqD#8qYu`TY< zo%C-Z3n!+~@VWA9oCk!dSSj_;^>LY?Okg_yu2wg_#y)qjL z58;`_QZ(AX?Ve_eUddV+D1|0mPCT=MwnB%&waOTDP&TGu56yQvqK;8t`Y7Gv3<4iahjaLq#0}j1CHvfP{pIZY!r;}W0ZCire`U>n0UF#I@ zIXun_w?QA1Je^3-cETmHiEAJkja24w;#5{Lpzvzl1lhuGhc!wI1m~LcTxkZ-+pI-J%Q}VvXLjXO#pt zLTC-8<&dzcq;k#wc)N8-#MQV0GpMZzAdcBqxIiU+lpPZn|$qByMYW!-b&iI{O z8?o{zQ47pY(9=`!jVi%d*a}|_;svcB*r*+v7?DFX`g)&$2*h;hP{`+nHKp1tC#*Cj zr2(|~LE{2#Lu(r?nOUO|C|7~4L-ma94jYg6&C)^pt>SzjAgwl)*L#qvERpjK2y0=y zDhNtv>#i$tOXz8mm_b!hb;vqa3n|kj%3Q8Eq(+C@?^s=ER(&k@YXFecw=JH6B@$R$#iV3`$!Pl z+-Z4m@b)l=2MgrdVJ(zVW-~s7GkY}`33gsv2_GnkQ`=!}cPRgDMSit5uU@V%F5u@_ zIh0y(v;Ok=uO#t@NZyQ7;bskA{`_OI{N}}E$qzQ@8{tkf@IS-+O>na)Zr@$K&kp)_ zw#6nb7%cuiQ@+i+ks;xefW;nZ0eSqtTXuplzRv_Kj6DPWa35nAcc)F5&F~Ll%V+%Ne=m+u9*+KysmGC&F*(6{Vfjm7{7+cU(GT8ED6u3jbDWhm_c`Bi%D3GXZ zwBFB=_W`7gsv+VE0*}ynl%R~a;nhg+3m@lL7BLa)2z}Q;=R$bB?lx1@E0G`9EoQ@X zb-n)8dW@OLGPs}1S4p39^5zIwZ~Z>4F9Ja%x=9!vUU$+xDxMifM3o=iL9633fts~h zjv>EhF^t7woG6bWL5_uJD2%I)qde2jfo$W7&QHZbG9KnYj|MJ--Uzn}qW9v=<;L+4 zl@84-9{NomAKu(H{`l5n6`lHcs}fSLZ#8kWrZ7?k9r-AAXT@1#Bs#r+6qAaABz2_^Ya)4TZsyen&7rYfk0u<=2Kzj&sSc*e& zOUIm@6b`IoAl8fP2{^9MZ#3a9!lz%O z6QzuC8-m<%1#XRg<27r$3^1T&?Gae{C2~wu8jP1~^VaXUyLXT%J#-@_A6k+6MOmx%-P8SchQnMia|xfkMYBpmwBLuMu*u?4pwL z=SuNl2VD+!;HEZDu9DHU;gK6}DFhPPAAH;>662@2k)qU9c5(u*W5Ot9aGBkI`a8++ zm!)%(!@f1#|Mlm<{PKKY@?2LU%_H0mU;YoDzslWtoJ4QW-pzNz=x8rVM$j>STqx-d z^oQokHGDX;W@84lK?+m4Y|JAdeI!5SXZ+Oz@W1E5(Sm#9{3=;(KV~NM- z-8u-(3N5I*HX2D}AG=`i5XcwVr~uMs`Aa!s{g}j^=8f9ZI;a&_9T;bznc8qP0qIq3 zwtJz*Xy@>&zD5y`Gf;zV=jqwI(Z;|^T@O(>1a-F$cIDs^RsQIFM!?iCatT+yqveOY zSqLe}585l$0#UHCNQ}i!kMy-H&X*~SUR`DTHZ{+c9}d>thV2QaL%7(AuW<{LT=|LY zI?zmmK6jDLU{mQuCE-@CmS$|WpzHv!{yQ#zj|-m8;q@AA3{4X(l#rm$?z19`Dkge3 zNbiuR$uU7Ju^uc-$bHim)64T6*0smAb`2c~qE)8TSvU&G3k$c(gN_bMJ-vgZ$@Y_> zaQfP*x#Kfb)OhDy8v4n8{x2$JmvWl**j4dIbEYln0u#;W5cHsOrSQ>|Vbj32LX)a% zR35$$h#6GZ;2M@?AHeEp)g5|s(A?uOZd}i76|>M6y?0)yZE)9{wOvMf#%G#Zb1dc$ z_!{Q)&(1R5@FOOIGSXR^g81Uecv)X&SVP k6a|u4eqQ)n+C%vDgLQ}3-R;MJ0{{U3{~!h|n$-~i0FWLr(EtDd diff --git a/docs/stdlib/core/delete.md b/docs/stdlib/core/delete.md index ea9c38f..8ace198 100644 --- a/docs/stdlib/core/delete.md +++ b/docs/stdlib/core/delete.md @@ -1,37 +1,40 @@ # delete `delete` removes a key from an object or an item from a vector (shrinking that -vector by 1 element). - - +vector by 1 element). It must be used on any expression that has a path +expression attached (e.g. `(delete (do-something).gone)`). Note that `delete` is, like all functions in Rudi, stateless and so does not -modify the given argument directly. To make your changes "stick", use the bang -modifier: `(delete! $var.foo)` – this only makes sense for symbols (i.e. -variables and the global document), which is incidentally also what the bang -modifier itself already enforces. So an expression like `(delete! [1 2][1])` is -invalid. +modify the given argument directly. That is why `delete` can be used on +non-variables/documents, like in `(delete (read-config).password)` to get rid +of individual keys from a larger data structure. To enable this, `delete` always +returns the _remaining_ datastructure, not the value that was removed. + +To make your changes "stick", use the bang modifier: `(delete! $var.foo)` – this +only makes sense for symbols (i.e. variables and the global document), since +there is no "source" to be updated for e.g. literals (hence +`(delete! [1 2 3][1])` is invalid). [`set`](set.md) is another function that is most often used with the bang -modifier. +modifier and also returns the entire datastructure, not just the newly inserted +value. ## Examples * `(delete! $var.key)` * `(delete! .[1])` - +* `(connect (delete (read-config).isAdmin))` to give `connect` a config object + object without the `isAdmin` flag. ## Forms -### `(delete target:symbol)` ➜ `any` +### `(delete target:pathed)` ➜ `any` -* `target` is a symbol with a path expression. +* `target` is a any expression with a path expression. `delete` evaluates the target expression and then removes whatever the path expression is pointing to. The return value is the remaining data (i.e. not the @@ -39,8 +42,3 @@ removed value). When used with the bang modifier, `target` must be a symbol with a path expression. - -## Context - -`delete` evaluates the expression in its own scope, so variables defined in it -do not leak. diff --git a/docs/stdlib/core/set.md b/docs/stdlib/core/set.md index 07d7351..c71da52 100644 --- a/docs/stdlib/core/set.md +++ b/docs/stdlib/core/set.md @@ -2,7 +2,11 @@ `set` is used to define variables and to update values of variables or the global document. Similar to [`delete`](delete.md) it is most often used with the -bang modifier (`(set! …)`), as modifications "do not stick" otherwise. +bang modifier (`(set! …)`), as modifications "do not stick" otherwise. However +`set` can also be used to modify a datastructure "in transit", like in +`(set! .user.settings (set $defaultSettings.isAdmin false))`, where the inner +function will change a subfield (`isAdmin`) but still return the entire +default settings object. `set` allows both overwriting the entire target value (`(set! $var …)` or `(set! . …)`) as well as setting just a sub element (for example @@ -12,48 +16,40 @@ Variables defined by `set` are scoped and only valid for all following sibling expressions, never the parent. For for example `(if true (set! $x 42)) $x` is invalid, as `$x` only exists in the positive branch of that `if` tuple. -`set` returns the value that was set. +`set` returns the entire target data structure, as if no path expression was +given. For example in `(set (read-config).isAdmin false)`, the entire +configuration would be returned, not just `false`. This is slightly different +semantics from most other functions, which would return only the resulting +value (e.g. `(append $foo.list 2)` would not return the entire `$foo` variable, +but only the `list` vector). In that sense, `set` works like `delete`, which +returns the _remaining_ data, not whatever was removed. ## Examples * `(set! $foo 42)` ➜ `42` * `(set! $foo 42) $foo` ➜ `42` * `(set! .global[0].document "new-value")` ➜ `"new-value"` - -Without the bang modifier, `set` is less useful: - -* `(set! $foo 42) (set $foo "new-value") $foo` ➜ `42` +* `(set! $config {a "yes" b "yes"}) (set $config.a "no")` ➜ `{a "yes" b "no"}` ## Forms -### `(set target:symbol value:expression)` ➜ `any` +### `(set target:pathed value:any)` ➜ `any` -* `target` is a symbol. +* `target` is any expression that can have a path expression. * `value` is any expression. - - -`set` evaluates the value and then applies it to the `target`. +`set` evaluates the `value` and then the path expression of `target`. If both +were successfully evaluated, the value is inserted into the target value at +the given path and then the entire target is returned. -If `target` is a variable with no path expression, its value is simply overwritten. -Likewise, a `.` will make `set` overwrite the entire global document. - -If a path expression is present, `set` will only set value deeply nested in the -target value (e.g. `(set $foo.bar 42)` will update the value at the key "bar" -in `$foo`, which is hopefully an object). Even in this case, the _return value_ -of `set` is still the _set_ value (42 in the previous example), not the combined -value. +Note that for variables, the path expression can be empty (e.g. +`(set $foo 42)`). For all other valid targets, a path expression must be set +(e.g. `(set (read-config).field 42)`) because there is no source that could be +overwritten (like with a variable or the global document). Also note that without the bang modifier, all of variable and document changes -are only valid inside the `set` tuple itself and will disappear / not leak -outside. - -If the `value` or the `target` return an error while evaluating, the error is -returned. - -## Context +are only returned, the underlying value is not modified in-place. -`set` evaluates all expressions as their own scopes, so variables from the -`value` expression do not influence the `target` expression's evaluation. +`set!` can only be used with variables and bare path expressions (i.e. the +global document), because there is no logical way to modify the result of a +function call in-place. diff --git a/pkg/builtin/core/docs/delete.md b/pkg/builtin/core/docs/delete.md index ea9c38f..8ace198 100644 --- a/pkg/builtin/core/docs/delete.md +++ b/pkg/builtin/core/docs/delete.md @@ -1,37 +1,40 @@ # delete `delete` removes a key from an object or an item from a vector (shrinking that -vector by 1 element). - - +vector by 1 element). It must be used on any expression that has a path +expression attached (e.g. `(delete (do-something).gone)`). Note that `delete` is, like all functions in Rudi, stateless and so does not -modify the given argument directly. To make your changes "stick", use the bang -modifier: `(delete! $var.foo)` – this only makes sense for symbols (i.e. -variables and the global document), which is incidentally also what the bang -modifier itself already enforces. So an expression like `(delete! [1 2][1])` is -invalid. +modify the given argument directly. That is why `delete` can be used on +non-variables/documents, like in `(delete (read-config).password)` to get rid +of individual keys from a larger data structure. To enable this, `delete` always +returns the _remaining_ datastructure, not the value that was removed. + +To make your changes "stick", use the bang modifier: `(delete! $var.foo)` – this +only makes sense for symbols (i.e. variables and the global document), since +there is no "source" to be updated for e.g. literals (hence +`(delete! [1 2 3][1])` is invalid). [`set`](set.md) is another function that is most often used with the bang -modifier. +modifier and also returns the entire datastructure, not just the newly inserted +value. ## Examples * `(delete! $var.key)` * `(delete! .[1])` - +* `(connect (delete (read-config).isAdmin))` to give `connect` a config object + object without the `isAdmin` flag. ## Forms -### `(delete target:symbol)` ➜ `any` +### `(delete target:pathed)` ➜ `any` -* `target` is a symbol with a path expression. +* `target` is a any expression with a path expression. `delete` evaluates the target expression and then removes whatever the path expression is pointing to. The return value is the remaining data (i.e. not the @@ -39,8 +42,3 @@ removed value). When used with the bang modifier, `target` must be a symbol with a path expression. - -## Context - -`delete` evaluates the expression in its own scope, so variables defined in it -do not leak. diff --git a/pkg/builtin/core/docs/set.md b/pkg/builtin/core/docs/set.md index 07d7351..c71da52 100644 --- a/pkg/builtin/core/docs/set.md +++ b/pkg/builtin/core/docs/set.md @@ -2,7 +2,11 @@ `set` is used to define variables and to update values of variables or the global document. Similar to [`delete`](delete.md) it is most often used with the -bang modifier (`(set! …)`), as modifications "do not stick" otherwise. +bang modifier (`(set! …)`), as modifications "do not stick" otherwise. However +`set` can also be used to modify a datastructure "in transit", like in +`(set! .user.settings (set $defaultSettings.isAdmin false))`, where the inner +function will change a subfield (`isAdmin`) but still return the entire +default settings object. `set` allows both overwriting the entire target value (`(set! $var …)` or `(set! . …)`) as well as setting just a sub element (for example @@ -12,48 +16,40 @@ Variables defined by `set` are scoped and only valid for all following sibling expressions, never the parent. For for example `(if true (set! $x 42)) $x` is invalid, as `$x` only exists in the positive branch of that `if` tuple. -`set` returns the value that was set. +`set` returns the entire target data structure, as if no path expression was +given. For example in `(set (read-config).isAdmin false)`, the entire +configuration would be returned, not just `false`. This is slightly different +semantics from most other functions, which would return only the resulting +value (e.g. `(append $foo.list 2)` would not return the entire `$foo` variable, +but only the `list` vector). In that sense, `set` works like `delete`, which +returns the _remaining_ data, not whatever was removed. ## Examples * `(set! $foo 42)` ➜ `42` * `(set! $foo 42) $foo` ➜ `42` * `(set! .global[0].document "new-value")` ➜ `"new-value"` - -Without the bang modifier, `set` is less useful: - -* `(set! $foo 42) (set $foo "new-value") $foo` ➜ `42` +* `(set! $config {a "yes" b "yes"}) (set $config.a "no")` ➜ `{a "yes" b "no"}` ## Forms -### `(set target:symbol value:expression)` ➜ `any` +### `(set target:pathed value:any)` ➜ `any` -* `target` is a symbol. +* `target` is any expression that can have a path expression. * `value` is any expression. - - -`set` evaluates the value and then applies it to the `target`. +`set` evaluates the `value` and then the path expression of `target`. If both +were successfully evaluated, the value is inserted into the target value at +the given path and then the entire target is returned. -If `target` is a variable with no path expression, its value is simply overwritten. -Likewise, a `.` will make `set` overwrite the entire global document. - -If a path expression is present, `set` will only set value deeply nested in the -target value (e.g. `(set $foo.bar 42)` will update the value at the key "bar" -in `$foo`, which is hopefully an object). Even in this case, the _return value_ -of `set` is still the _set_ value (42 in the previous example), not the combined -value. +Note that for variables, the path expression can be empty (e.g. +`(set $foo 42)`). For all other valid targets, a path expression must be set +(e.g. `(set (read-config).field 42)`) because there is no source that could be +overwritten (like with a variable or the global document). Also note that without the bang modifier, all of variable and document changes -are only valid inside the `set` tuple itself and will disappear / not leak -outside. - -If the `value` or the `target` return an error while evaluating, the error is -returned. - -## Context +are only returned, the underlying value is not modified in-place. -`set` evaluates all expressions as their own scopes, so variables from the -`value` expression do not influence the `target` expression's evaluation. +`set!` can only be used with variables and bare path expressions (i.e. the +global document), because there is no logical way to modify the result of a +function call in-place. diff --git a/pkg/builtin/core/functions.go b/pkg/builtin/core/functions.go index 12eb894..f86114b 100644 --- a/pkg/builtin/core/functions.go +++ b/pkg/builtin/core/functions.go @@ -24,13 +24,13 @@ var ( Functions = types.Functions{ "default": functions.NewBuilder(defaultFunction).WithDescription("returns the default value if the first argument is empty").Build(), - "delete": functions.NewBuilder(deleteFunction).WithBangHandler(deleteBangHandler).WithDescription("removes a key from an object or an item from a vector").Build(), + "delete": functions.NewBuilder(deleteFunction).WithBangHandler(overwriteEverythingBangHandler).WithDescription("removes a key from an object or an item from a vector").Build(), "do": functions.NewBuilder(DoFunction).WithDescription("eval a sequence of statements where only one expression is valid").Build(), "empty?": functions.NewBuilder(isEmptyFunction).WithCoalescer(humaneCoalescer).WithDescription("returns true when the given value is empty-ish (0, false, null, \"\", ...)").Build(), "error": functions.NewBuilder(errorFunction, fmtErrorFunction).WithDescription("returns an error").Build(), "has?": functions.NewBuilder(hasFunction).WithDescription("returns true if the given symbol's path expression points to an existing value").Build(), "if": functions.NewBuilder(ifElseFunction, ifFunction).WithDescription("evaluate one of two expressions based on a condition").Build(), - "set": functions.NewBuilder(setFunction).WithDescription("set a value in a variable/document, only really useful with ! modifier (set!)").Build(), + "set": functions.NewBuilder(setFunction).WithBangHandler(overwriteEverythingBangHandler).WithDescription("set a value in a variable/document, only really useful with ! modifier (set!)").Build(), "try": functions.NewBuilder(tryWithFallbackFunction, tryFunction).WithDescription("returns the fallback if the first expression errors out").Build(), } ) @@ -83,47 +83,14 @@ func DoFunction(ctx types.Context, args ...ast.Expression) (any, error) { } func hasFunction(ctx types.Context, arg ast.Expression) (any, error) { - var ( - expr ast.Expression - pathExpr *ast.PathExpression - ) - - // separate base value expression from the path expression - - if symbol, ok := arg.(ast.Symbol); ok { - pathExpr = symbol.PathExpression - - if symbol.Variable != nil { - symbol.PathExpression = nil - } else { - // for bare path expressions - symbol.PathExpression = &ast.PathExpression{} - } - - expr = symbol - } - - if vectorNode, ok := arg.(ast.VectorNode); ok { - pathExpr = vectorNode.PathExpression - vectorNode.PathExpression = nil - expr = vectorNode - } - - if objectNode, ok := arg.(ast.ObjectNode); ok { - pathExpr = objectNode.PathExpression - objectNode.PathExpression = nil - expr = objectNode - } - - if tuple, ok := arg.(ast.Tuple); ok { - pathExpr = tuple.PathExpression - tuple.PathExpression = nil - expr = tuple + pathed, ok := arg.(ast.Pathed) + if !ok { + return nil, fmt.Errorf("expected datatype that can hold a path expression, got %T", arg) } - if expr == nil { - return nil, fmt.Errorf("expected Symbol, Vector, Object or Tuple, got %T", arg) - } + // separate base value expression from the path expression + pathExpr := pathed.GetPathExpression() + expr := pathed.Pathless() if pathExpr == nil { return nil, errors.New("argument has no path expression") @@ -190,130 +157,137 @@ func tryWithFallbackFunction(ctx types.Context, test ast.Expression, fallback as return result, nil } -// TODO: Allow (set (foo).bar 42) - // (set VAR:Variable VALUE:any) // (set EXPR:PathExpression VALUE:any) -func setFunction(ctx types.Context, target, value ast.Expression) (any, error) { - symbol, ok := target.(ast.Symbol) +func setFunction(ctx types.Context, target ast.Expression, value any) (any, error) { + pathed, ok := target.(ast.Pathed) if !ok { - return nil, fmt.Errorf("argument #0 is not a symbol, but %T", target) + return nil, fmt.Errorf("expected datatype that can hold a path expression, got %T", target) } - // catch symbols that are technically invalid - if symbol.Variable == nil && symbol.PathExpression == nil { - return nil, fmt.Errorf("argument #0: must be path expression or variable, got %s", symbol.ExpressionName()) + // separate base value expression from the path expression + pathExpr := pathed.GetPathExpression() + expr := pathed.Pathless() + + // Make sure (set) calls make sense: For non-symbols, a path must be set, because + // "(set (foo) 42)" or "(set [1 2 3] 42)" do not make sense. For symbols on the other + // hand, we only pre-evaluate the target if a path exists, because we still need to + // allow variables not existing, so that "(set! $var 42)" can succeed. + switch target.(type) { + case ast.Symbol: + // Set relies entirely on the bang modifier handling to actually set values + // in variables or the global document; without the bang modifier, (set) + // is basically a no-op and we do not even have to evaluate the symbol here. + if pathExpr == nil { + return value, nil + } + + case ast.Tuple, ast.VectorNode, ast.ObjectNode: + if pathExpr == nil { + return nil, errors.New("no path expression provided on target value") + } + + default: + return nil, fmt.Errorf("unexpected target value of type %T", target) } - // discard any context changes within the newValue expression - _, newValue, err := ctx.Runtime().EvalExpression(ctx, value) + // pre-evaluate the path (assuming it's cheaper to calculate than the main expression) + evaluatedPath, err := pathexpr.Eval(ctx, pathExpr) if err != nil { - return nil, fmt.Errorf("argument #1: %w", err) + return nil, fmt.Errorf("invalid path expression: %w", err) } - // Set relies entirely on the bang modifier handling to actually set values - // in variables or the global document; without the bang modifier, (set) - // is basically a no-op. + // evaluate the target + _, targetValue, err := ctx.Runtime().EvalExpression(ctx, expr) + if err != nil { + return nil, err + } - return newValue, nil -} + // we need to operate on a _copy_ of the value, as updating happens in the bang handler later + // on; this is only necessary for symbols though, as functions (tuples) are expected to return + // non-pointer data and vector/objects nodes are literals. + if _, ok := target.(ast.Symbol); ok { + targetValue, err = deepcopy.Clone(targetValue) + if err != nil { + return nil, fmt.Errorf("invalid current value: %w", err) + } + } -// TODO: Shouldn't (delete (map $obj to-upper).key) also work, i.e. not just -// symbols? Symbols are only important for the bang handler, which checks it -// independently. + return jsonpath.Set(targetValue, jsonpath.FromEvaluatedPath(*evaluatedPath), value) +} -// (delete VAR:Variable) -// (delete EXPR:PathExpression) -func deleteFunction(ctx types.Context, expr ast.Expression) (any, error) { - symbol, ok := expr.(ast.Symbol) +// (delete TARGET:Pathed) +func deleteFunction(ctx types.Context, target ast.Expression) (any, error) { + pathed, ok := target.(ast.Pathed) if !ok { - return nil, fmt.Errorf("argument #0 is not a symbol, but %T", expr) + return nil, fmt.Errorf("argument is not a type with path expression, but %T", target) } - // catch symbols that are technically invalid - if symbol.PathExpression == nil { - return nil, fmt.Errorf("argument #0: must be path expression, got %s", symbol.ExpressionName()) + // separate base value expression from the path expression + pe := pathed.GetPathExpression() + if pe == nil { + return nil, errors.New("empty path expression") } // pre-evaluate the path - pathExpr, err := pathexpr.Eval(ctx, symbol.PathExpression) + pathExpr, err := pathexpr.Eval(ctx, pe) if err != nil { - return nil, fmt.Errorf("argument #0: invalid path expression: %w", err) - } - - // get the current value - var currentValue any - - if symbol.Variable != nil { - varName := string(*symbol.Variable) - - // a non-existing variable is fine, this is how you define new variables in the first place - currentValue, _ = ctx.GetVariable(varName) - } else { - currentValue = ctx.GetDocument().Data() + return nil, fmt.Errorf("invalid path expression: %w", err) } - // we need to operate on a _copy_ of the value and then, if need be, rely on the BangHandler - // to make the actual deletion happen and stick. - currentValue, err = deepcopy.Clone(currentValue) + // evaluate the target + _, targetValue, err := ctx.Runtime().EvalExpression(ctx, pathed.Pathless()) if err != nil { - return nil, fmt.Errorf("invalid current value: %w", err) + return nil, err } - // delete the desired path in the value - updatedValue, err := jsonpath.Delete(currentValue, jsonpath.FromEvaluatedPath(*pathExpr)) - if err != nil { - return nil, fmt.Errorf("cannot delete %s in %T: %w", pathExpr, currentValue, err) + // we need to operate on a _copy_ of the value, as updating happens in the bang handler later + // on; this is only necessary for symbols though, as functions (tuples) are expected to return + // non-pointer data and vector/objects nodes are literals. + if _, ok := target.(ast.Symbol); ok { + targetValue, err = deepcopy.Clone(targetValue) + if err != nil { + return nil, fmt.Errorf("invalid current value: %w", err) + } } - return updatedValue, nil + // delete the desired path in the value + return jsonpath.Delete(targetValue, jsonpath.FromEvaluatedPath(*pathExpr)) } -func deleteBangHandler(ctx types.Context, originalArgs []ast.Expression, value any) (types.Context, any, error) { +// overwriteEverythingBangHandler is the custom BangHandler for the set and delete functions. +// Normally, function calls like "(append $obj.list 1)" would not return the entire object, but only +// the function result (in this case, a vector with one more element added to it). This is because +// for most functions, the path expression is evaluated before the argument is created and the +// append function is called (i.e. append doesn't even see that its first argument originates from +// $obj.list). +// If set behaved the same way, "(set $obj.value 42)" would return 42, not the entire object. That +// is not really helpful though. If you wanted to take an object and just update one value in it, +// you'd be forced to use a temporary variable ("(set! $o ....) (set! $o.value 42) $o"). +// Because of this, set returns the whole updated data structure, so in the example above, the +// entire $obj. It can do this because the first argument is not pre-evaluated by the Rudi runtime, +// but passed as a raw expression (like for delete). +// All of this applies equally to the delete function, hence both share the same bang handler. +// Since this behaviour makes set different from other regular functions, it needs a custom +// BangHandler. +func overwriteEverythingBangHandler(ctx types.Context, originalArgs []ast.Expression, value any) (types.Context, any, error) { if len(originalArgs) == 0 { return ctx, nil, errors.New("must have at least 1 symbol argument") } firstArg := originalArgs[0] - sym, ok := firstArg.(ast.Symbol) + symbol, ok := firstArg.(ast.Symbol) if !ok { return ctx, nil, fmt.Errorf("must use Symbol as first argument, got %T", firstArg) } - updatedValue := value - - // if the symbol has a path to traverse, do so - if sym.PathExpression != nil { - // pre-evaluate the path expression - pathExpr, err := pathexpr.Eval(ctx, sym.PathExpression) - if err != nil { - return ctx, nil, fmt.Errorf("argument #0: invalid path expression: %w", err) - } - - // get the current value of the symbol - var currentValue any - - if sym.Variable != nil { - varName := string(*sym.Variable) - - // a non-existing variable is fine, this is how you define new variables in the first place - currentValue, _ = ctx.GetVariable(varName) - } else { - currentValue = ctx.GetDocument().Data() - } - - // apply the path expression - updatedValue, err = jsonpath.Delete(currentValue, jsonpath.FromEvaluatedPath(*pathExpr)) - if err != nil { - return ctx, nil, fmt.Errorf("cannot set value in %T at %s: %w", currentValue, pathExpr, err) - } - } - - if sym.Variable != nil { - varName := string(*sym.Variable) - ctx = ctx.WithVariable(varName, updatedValue) + // Since set always returns the entire data structure, all we must do here is to + // update the target, ignoring the path expression on the symbol. + if symbol.Variable != nil { + varName := string(*symbol.Variable) + ctx = ctx.WithVariable(varName, value) } else { - ctx.GetDocument().Set(updatedValue) + ctx.GetDocument().Set(value) } return ctx, value, nil diff --git a/pkg/builtin/core/functions_test.go b/pkg/builtin/core/functions_test.go index a8a7a70..5bf70ca 100644 --- a/pkg/builtin/core/functions_test.go +++ b/pkg/builtin/core/functions_test.go @@ -168,7 +168,16 @@ func TestSetFunction(t *testing.T) { { Expression: `(set! $obj.aList[1] "new value")`, Variables: testVariables(), - Expected: "new value", + Expected: map[string]any{ + "aString": "foo", + "aList": []any{"first", "new value", "third"}, + "aBool": true, + "anObject": map[string]any{ + "key1": true, + "key2": nil, + "key3": []any{9, map[string]any{"foo": "bar"}, 7}, + }, + }, }, { Expression: `(set! $obj.aList[1] "new value") $obj`, @@ -212,7 +221,7 @@ func TestSetFunction(t *testing.T) { // do not accidentally set a key without creating a new context { Expression: `(set! $a {foo "bar"}) (if true (set! $a.foo "updated"))`, - Expected: "updated", + Expected: map[string]any{"foo": "updated"}, }, { Expression: `(set! $a {foo "bar"}) (if true (set! $a.foo "updated")) $a.foo`, @@ -235,8 +244,17 @@ func TestSetFunction(t *testing.T) { // update a key within an object variable { Expression: `(set! $obj.aString "new value")`, - Expected: "new value", - Variables: testVariables(), + Expected: map[string]any{ + "aString": "new value", + "aList": []any{"first", 2, "third"}, + "aBool": true, + "anObject": map[string]any{ + "key1": true, + "key2": nil, + "key3": []any{9, map[string]any{"foo": "bar"}, 7}, + }, + }, + Variables: testVariables(), }, { Expression: `(set! $obj.aString "new value") $obj.aString`, @@ -246,8 +264,18 @@ func TestSetFunction(t *testing.T) { // add a new sub key { Expression: `(set! $obj.newKey "new value")`, - Expected: "new value", - Variables: testVariables(), + Expected: map[string]any{ + "aString": "foo", + "aList": []any{"first", 2, "third"}, + "aBool": true, + "anObject": map[string]any{ + "key1": true, + "key2": nil, + "key3": []any{9, map[string]any{"foo": "bar"}, 7}, + }, + "newKey": "new value", + }, + Variables: testVariables(), }, { Expression: `(set! $obj.newKey "new value") $obj.newKey`, @@ -315,6 +343,15 @@ func TestSetFunction(t *testing.T) { }, }, }, + // modify data in transit + { + Expression: `(set (set $foo [1 2 3])[1] 4)`, + Expected: []any{int64(1), int64(4), int64(3)}, + }, + { + Expression: `(set (set $foo {foo "bar"}).foo 4)`, + Expected: map[string]any{"foo": int64(4)}, + }, } for _, testcase := range testcases { @@ -351,14 +388,12 @@ func TestDeleteFunction(t *testing.T) { Invalid: true, }, { - // TODO: This should be valid. Expression: `(delete [1 2 3][1])`, - Invalid: true, + Expected: []any{int64(1), int64(3)}, }, { - // TODO: This should be valid. - Expression: `(delete {foo "bar"}.foo)`, - Invalid: true, + Expression: `(delete {foo "bar" hei "verden"}.foo)`, + Expected: map[string]any{"hei": "verden"}, }, // allow removing everything { diff --git a/pkg/lang/ast/types.go b/pkg/lang/ast/types.go index 0b645c9..9ea7579 100644 --- a/pkg/lang/ast/types.go +++ b/pkg/lang/ast/types.go @@ -24,6 +24,18 @@ type Expression interface { ExpressionName() string } +type Pathed interface { + Expression + + // GetPathExpression returns the optional path expression. Just because a type can hold + // a path expression does not mean one is always set. + GetPathExpression() *PathExpression + + // Pathless returns a shallow copy with the path expression omitted, e.g. + // turning a "(foo).bar" into "(foo)". + Pathless() Pathed +} + // A program is either a series of statements or a single, non-tuple expression. type Program struct { Statements []Statement @@ -41,16 +53,7 @@ func (p Program) String() string { } func (p Program) ExpressionName() string { - var name string - - switch { - case len(p.Statements) > 0: - name = "Statements" - default: - name = "?" - } - - return "Program(" + name + ")" + return "Program" } type Statement struct { @@ -77,6 +80,7 @@ type Symbol struct { } var _ Expression = Symbol{} +var _ Pathed = Symbol{} func (s Symbol) IsDot() bool { return s.Variable == nil && s.PathExpression != nil && s.PathExpression.IsIdentity() @@ -122,12 +126,30 @@ func (s Symbol) ExpressionName() string { return "Symbol(" + name + ")" } +func (s Symbol) GetPathExpression() *PathExpression { + return s.PathExpression +} + +func (s Symbol) Pathless() Pathed { + if s.Variable != nil { + return Symbol{ + Variable: s.Variable, + } + } + + // for bare path expressions + return Symbol{ + PathExpression: &PathExpression{}, + } +} + type Tuple struct { Expressions []Expression PathExpression *PathExpression } var _ Expression = Tuple{} +var _ Pathed = Tuple{} func (t Tuple) String() string { path := "" @@ -147,6 +169,16 @@ func (Tuple) ExpressionName() string { return "Tuple" } +func (t Tuple) GetPathExpression() *PathExpression { + return t.PathExpression +} + +func (t Tuple) Pathless() Pathed { + return Tuple{ + Expressions: t.Expressions, + } +} + // VectorNode represents the parsed code for constructing an vector. // When an VectorNode is evaluated, it turns into an Vector. type VectorNode struct { @@ -155,6 +187,7 @@ type VectorNode struct { } var _ Expression = VectorNode{} +var _ Pathed = VectorNode{} func (v VectorNode) String() string { path := "" @@ -174,6 +207,16 @@ func (VectorNode) ExpressionName() string { return "Vector" } +func (v VectorNode) GetPathExpression() *PathExpression { + return v.PathExpression +} + +func (v VectorNode) Pathless() Pathed { + return VectorNode{ + Expressions: v.Expressions, + } +} + // ObjectNode represents the parsed code for constructing an object. // When an ObjectNode is evaluated, it turns into an Object. type ObjectNode struct { @@ -182,6 +225,7 @@ type ObjectNode struct { } var _ Expression = ObjectNode{} +var _ Pathed = ObjectNode{} func (o ObjectNode) String() string { pairs := make([]string, len(o.Data)) @@ -195,6 +239,16 @@ func (ObjectNode) ExpressionName() string { return "Object" } +func (o ObjectNode) GetPathExpression() *PathExpression { + return o.PathExpression +} + +func (o ObjectNode) Pathless() Pathed { + return ObjectNode{ + Data: o.Data, + } +} + type KeyValuePair struct { Key Expression Value Expression diff --git a/pkg/lang/ast/types_test.go b/pkg/lang/ast/types_test.go index ddd69ed..be905b7 100644 --- a/pkg/lang/ast/types_test.go +++ b/pkg/lang/ast/types_test.go @@ -24,7 +24,7 @@ func TestExpressionNames(t *testing.T) { { // technically invalid expr: Program{}, - expected: "Program(?)", + expected: "Program", }, { expr: Program{ @@ -32,7 +32,7 @@ func TestExpressionNames(t *testing.T) { Expression: Null{}, }}, }, - expected: "Program(Statements)", + expected: "Program", }, { expr: Statement{},