diff --git a/examples/gno.land/p/demo/json/buffer.gno b/examples/gno.land/p/demo/json/buffer.gno index 23fb53fb0ea..d726ffadc7d 100644 --- a/examples/gno.land/p/demo/json/buffer.gno +++ b/examples/gno.land/p/demo/json/buffer.gno @@ -3,7 +3,6 @@ package json import ( "errors" "io" - "strings" "gno.land/p/demo/ufmt" ) @@ -28,6 +27,10 @@ func newBuffer(data []byte) *buffer { } } +func (b *buffer) reset() { + b.last = GO +} + // first retrieves the first non-whitespace (or other escaped) character in the buffer. func (b *buffer) first() (byte, error) { for ; b.index < b.length; b.index++ { @@ -122,16 +125,7 @@ func (b *buffer) skipAny(endTokens map[byte]bool) error { b.index++ } - // build error message - var tokens []string - for token := range endTokens { - tokens = append(tokens, string(token)) - } - - return ufmt.Errorf( - "EOF reached before encountering one of the expected tokens: %s", - strings.Join(tokens, ", "), - ) + return io.EOF } // skipAndReturnIndex moves the buffer index forward by one and returns the new index. @@ -175,9 +169,9 @@ var significantTokens = map[byte]bool{ // filterTokens stores the filter expression tokens. var filterTokens = map[byte]bool{ - aesterisk: true, // wildcard - andSign: true, - orSign: true, + asterisk: true, // wildcard + andSign: true, + orSign: true, } // skipToNextSignificantToken advances the buffer index to the next significant character. @@ -219,121 +213,6 @@ func (b *buffer) backslash() bool { return count%2 != 0 } -// numIndex holds a map of valid numeric characters -var numIndex = map[byte]bool{ - '0': true, - '1': true, - '2': true, - '3': true, - '4': true, - '5': true, - '6': true, - '7': true, - '8': true, - '9': true, - '.': true, - 'e': true, - 'E': true, -} - -// pathToken checks if the current token is a valid JSON path token. -func (b *buffer) pathToken() error { - var stack []byte - - inToken := false - inNumber := false - first := b.index - - for b.index < b.length { - c := b.data[b.index] - - switch { - case c == doubleQuote || c == singleQuote: - inToken = true - if err := b.step(); err != nil { - return errors.New("error stepping through buffer") - } - - if err := b.skip(c); err != nil { - return errors.New("unmatched quote in path") - } - - if b.index >= b.length { - return errors.New("unmatched quote in path") - } - - case c == bracketOpen || c == parenOpen: - inToken = true - stack = append(stack, c) - - case c == bracketClose || c == parenClose: - inToken = true - if len(stack) == 0 || (c == bracketClose && stack[len(stack)-1] != bracketOpen) || (c == parenClose && stack[len(stack)-1] != parenOpen) { - return errors.New("mismatched bracket or parenthesis") - } - - stack = stack[:len(stack)-1] - - case pathStateContainsValidPathToken(c): - inToken = true - - case c == plus || c == minus: - if inNumber || (b.index > 0 && numIndex[b.data[b.index-1]]) { - inToken = true - } else if !inToken && (b.index+1 < b.length && numIndex[b.data[b.index+1]]) { - inToken = true - inNumber = true - } else if !inToken { - return errors.New("unexpected operator at start of token") - } - - default: - if len(stack) != 0 || inToken { - inToken = true - } else { - goto end - } - } - - b.index++ - } - -end: - if len(stack) != 0 { - return errors.New("unclosed bracket or parenthesis at end of path") - } - - if first == b.index { - return errors.New("no token found") - } - - if inNumber && !numIndex[b.data[b.index-1]] { - inNumber = false - } - - return nil -} - -func pathStateContainsValidPathToken(c byte) bool { - if _, ok := significantTokens[c]; ok { - return true - } - - if _, ok := filterTokens[c]; ok { - return true - } - - if _, ok := numIndex[c]; ok { - return true - } - - if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' { - return true - } - - return false -} - func (b *buffer) numeric(token bool) error { if token { b.last = GO @@ -483,3 +362,121 @@ func numberKind2f64(value interface{}) (result float64, err error) { return } + +// numIndex holds a map of valid numeric characters +var numIndex = map[byte]bool{ + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + '.': true, + 'e': true, + 'E': true, +} + +// pathToken checks if the current token is a valid JSON path token. +func (b *buffer) pathToken() error { + var stack []byte + + inToken := false + inNumber := false + first := b.index + + for b.index < b.length { + c := b.data[b.index] + + switch { + case c == singleQuote: + fallthrough + + case c == doubleQuote: + inToken = true + if err := b.step(); err != nil { + return errors.New("error stepping through buffer") + } + + if err := b.skip(c); err != nil { + return errors.New("unmatched quote in path") + } + + if b.index >= b.length { + return errors.New("unmatched quote in path") + } + + case c == bracketOpen || c == parenOpen: + inToken = true + stack = append(stack, c) + + case c == bracketClose || c == parenClose: + inToken = true + if len(stack) == 0 || (c == bracketClose && stack[len(stack)-1] != bracketOpen) || (c == parenClose && stack[len(stack)-1] != parenOpen) { + return errors.New("mismatched bracket or parenthesis") + } + + stack = stack[:len(stack)-1] + + case pathStateContainsValidPathToken(c): + inToken = true + + case c == plus || c == minus: + if inNumber || (b.index > 0 && numIndex[b.data[b.index-1]]) { + inToken = true + } else if !inToken && (b.index+1 < b.length && numIndex[b.data[b.index+1]]) { + inToken = true + inNumber = true + } else if !inToken { + return errors.New("unexpected operator at start of token") + } + + default: + if len(stack) != 0 || inToken { + inToken = true + } else { + goto end + } + } + + b.index++ + } + +end: + if len(stack) != 0 { + return errors.New("unclosed bracket or parenthesis at end of path") + } + + if first == b.index { + return errors.New("no token found") + } + + if inNumber && !numIndex[b.data[b.index-1]] { + inNumber = false + } + + return nil +} + +func pathStateContainsValidPathToken(c byte) bool { + if _, ok := significantTokens[c]; ok { + return true + } + + if _, ok := filterTokens[c]; ok { + return true + } + + if _, ok := numIndex[c]; ok { + return true + } + + if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' { + return true + } + + return false +} diff --git a/examples/gno.land/p/demo/json/buffer_test.gno b/examples/gno.land/p/demo/json/buffer_test.gno index b8dce390a61..2569385197e 100644 --- a/examples/gno.land/p/demo/json/buffer_test.gno +++ b/examples/gno.land/p/demo/json/buffer_test.gno @@ -1,6 +1,9 @@ package json -import "testing" +import ( + "io" + "testing" +) func TestBufferCurrent(t *testing.T) { tests := []struct { diff --git a/examples/gno.land/p/demo/json/node.gno b/examples/gno.land/p/demo/json/node.gno index 1e71a101e62..e6f8648ba10 100644 --- a/examples/gno.land/p/demo/json/node.gno +++ b/examples/gno.land/p/demo/json/node.gno @@ -56,6 +56,23 @@ func NewNode(prev *Node, b *buffer, typ ValueType, key **string) (*Node, error) return curr, nil } +func valueNode(prev *Node, key string, typ ValueType, value interface{}) *Node { + curr := &Node{ + prev: prev, + data: nil, + borders: [2]int{0, 0}, + key: &key, + nodeType: typ, + modified: false, + } + + if curr.value != nil { + curr.value = value + } + + return curr +} + // load retrieves the value of the current node. func (n *Node) load() interface{} { return n.value @@ -480,31 +497,49 @@ func ObjectNode(key string, value map[string]*Node) *Node { // IsArray returns true if the current node is array type. func (n *Node) IsArray() bool { + if n == nil { + return false + } return n.nodeType == Array } // IsObject returns true if the current node is object type. func (n *Node) IsObject() bool { + if n == nil { + return false + } return n.nodeType == Object } // IsNull returns true if the current node is null type. func (n *Node) IsNull() bool { + if n == nil { + return false + } return n.nodeType == Null } // IsBool returns true if the current node is boolean type. func (n *Node) IsBool() bool { + if n == nil { + return false + } return n.nodeType == Boolean } // IsString returns true if the current node is string type. func (n *Node) IsString() bool { + if n == nil { + return false + } return n.nodeType == String } // IsNumber returns true if the current node is number type. func (n *Node) IsNumber() bool { + if n == nil { + return false + } return n.nodeType == Number } @@ -1081,3 +1116,73 @@ func Must(root *Node, expect error) *Node { return root } + +func (n *Node) Keys() []string { + if n == nil { + return nil + } + result := make([]string, 0, len(n.next)) + for key := range n.next { + result = append(result, key) + } + return result +} + +func (n *Node) getSortedChildren() (result []*Node) { + if n == nil { + return nil + } + + size := len(n.next) + if n.IsObject() { + result = make([]*Node, size) + keys := n.Keys() + + // sort keys in ascending order + for i := 1; i < len(keys); i++ { + key := keys[i] + j := i - 1 + for j >= 0 && keys[j] > key { + keys[j+1] = keys[j] + j-- + } + keys[j+1] = key + } + + for i, key := range keys { + result[i] = n.next[key] + } + } else if n.IsArray() { + result = make([]*Node, size) + for _, elem := range n.next { + result[*elem.index] = elem + } + } + + return result +} + +func deepEqual(n1, n2 *Node) bool { + if n1 == nil && n2 == nil { + return true + } + if n1 == nil || n2 == nil { + return false + } + if n1.nodeType != n2.nodeType { + return false + } + if n1.value != n2.value { + return false + } + if len(n1.next) != len(n2.next) { + return false + } + for key, child1 := range n1.next { + child2, ok := n2.next[key] + if !ok || !deepEqual(child1, child2) { + return false + } + } + return true +} diff --git a/examples/gno.land/p/demo/json/node_test.gno b/examples/gno.land/p/demo/json/node_test.gno index dbc82369f68..819923bd4f7 100644 --- a/examples/gno.land/p/demo/json/node_test.gno +++ b/examples/gno.land/p/demo/json/node_test.gno @@ -1,7 +1,6 @@ package json import ( - "bytes" "sort" "strconv" "strings" @@ -73,7 +72,7 @@ func TestNode_CreateNewNode(t *testing.T) { return } - if !compareNodes(got, tt.expectCurr) { + if !deepEqual(got, tt.expectCurr) { t.Errorf("%s got = %v, want %v", tt.name, got, tt.expectCurr) } }) @@ -300,40 +299,227 @@ func TestNode_GetBool_Fail(t *testing.T) { } } -func TestNode_IsBool(t *testing.T) { - tests := []simpleNode{ - {"true", BoolNode("", true)}, - {"false", BoolNode("", false)}, +func TestNode_IsBool_With_Unmarshal(t *testing.T) { + tests := []struct { + name string + json []byte + want bool + }{ + {"true", []byte("true"), true}, + {"false", []byte("false"), true}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if !tt.node.IsBool() { + root, err := Unmarshal(tt.json) + if err != nil { + t.Errorf("Error on Unmarshal(): %s", err.Error()) + } + + if root.IsBool() != tt.want { t.Errorf("%s should be a bool", tt.name) } }) } } -func TestNode_IsBool_With_Unmarshal(t *testing.T) { +func TestNode_IsString(t *testing.T) { tests := []struct { name string - json []byte + node *Node want bool }{ - {"true", []byte("true"), true}, - {"false", []byte("false"), true}, + { + name: "String node", + node: &Node{nodeType: String}, + want: true, + }, + { + name: "Non-string node", + node: &Node{nodeType: Number}, + want: false, + }, + { + name: "Nil node", + node: nil, + want: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - root, err := Unmarshal(tt.json) - if err != nil { - t.Errorf("Error on Unmarshal(): %s", err.Error()) + if got := tt.node.IsString(); got != tt.want { + t.Errorf("Node.IsString() = %v, want %v", got, tt.want) } + }) + } +} - if root.IsBool() != tt.want { - t.Errorf("%s should be a bool", tt.name) +func TestNode_IsNumber(t *testing.T) { + tests := []struct { + name string + node *Node + want bool + }{ + { + name: "Number node", + node: &Node{nodeType: Number}, + want: true, + }, + { + name: "Non-number node", + node: &Node{nodeType: String}, + want: false, + }, + { + name: "Nil node", + node: nil, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.node.IsNumber(); got != tt.want { + t.Errorf("Node.IsNumber() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNode_IsBool(t *testing.T) { + tests := []struct { + name string + node *Node + want bool + }{ + { + name: "Bool node", + node: &Node{nodeType: Boolean}, + want: true, + }, + { + name: "Non-bool node", + node: &Node{nodeType: String}, + want: false, + }, + { + name: "Nil node", + node: nil, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.node.IsBool(); got != tt.want { + t.Errorf("Node.IsBool() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNode_IsNull(t *testing.T) { + tests := []struct { + name string + node *Node + want bool + }{ + { + name: "Null node", + node: &Node{nodeType: Null}, + want: true, + }, + { + name: "Non-null node", + node: &Node{nodeType: String}, + want: false, + }, + { + name: "Nil node", + node: nil, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.node.IsNull(); got != tt.want { + t.Errorf("Node.IsNull() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNode_IsArray(t *testing.T) { + tests := []struct { + name string + node *Node + want bool + }{ + { + name: "Array node", + node: &Node{nodeType: Array}, + want: true, + }, + { + name: "Non-array node", + node: &Node{nodeType: String}, + want: false, + }, + { + name: "Nil node", + node: nil, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.node.IsArray(); got != tt.want { + t.Errorf("Node.IsArray() = %v, want %v", got, tt.want) + } + }) + } + + root, err := Unmarshal(sampleArr) + if err != nil { + t.Errorf("Error on Unmarshal(): %s", err) + return + } + + if root.Type() != Array { + t.Errorf(ufmt.Sprintf("Must be an array. got: %s", root.Type().String())) + } +} + +func TestNode_IsObject(t *testing.T) { + tests := []struct { + name string + node *Node + want bool + }{ + { + name: "Object node", + node: &Node{nodeType: Object}, + want: true, + }, + { + name: "Non-object node", + node: &Node{nodeType: String}, + want: false, + }, + { + name: "Nil node", + node: nil, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.node.IsObject(); got != tt.want { + t.Errorf("Node.IsObject() = %v, want %v", got, tt.want) } }) } @@ -593,18 +779,6 @@ func TestNode_GetArray_Fail(t *testing.T) { } } -func TestNode_IsArray(t *testing.T) { - root, err := Unmarshal(sampleArr) - if err != nil { - t.Errorf("Error on Unmarshal(): %s", err) - return - } - - if root.Type() != Array { - t.Errorf(ufmt.Sprintf("Must be an array. got: %s", root.Type().String())) - } -} - func TestNode_ArrayEach(t *testing.T) { tests := []struct { name string @@ -1344,49 +1518,3 @@ func isSameObject(a, b string) bool { return true } - -func compareNodes(n1, n2 *Node) bool { - if n1 == nil || n2 == nil { - return n1 == n2 - } - - if n1.key != n2.key { - return false - } - - if !bytes.Equal(n1.data, n2.data) { - return false - } - - if n1.index != n2.index { - return false - } - - if n1.borders != n2.borders { - return false - } - - if n1.modified != n2.modified { - return false - } - - if n1.nodeType != n2.nodeType { - return false - } - - if !compareNodes(n1.prev, n2.prev) { - return false - } - - if len(n1.next) != len(n2.next) { - return false - } - - for k, v := range n1.next { - if !compareNodes(v, n2.next[k]) { - return false - } - } - - return true -} diff --git a/examples/gno.land/p/demo/json/parser.gno b/examples/gno.land/p/demo/json/parser.gno index 9a2c3a8c817..4efd9484e89 100644 --- a/examples/gno.land/p/demo/json/parser.gno +++ b/examples/gno.land/p/demo/json/parser.gno @@ -4,6 +4,7 @@ import ( "bytes" "errors" "strconv" + "unicode" el "gno.land/p/demo/json/eisel_lemire" ) @@ -82,48 +83,6 @@ func ParseFloatLiteral(bytes []byte) (float64, error) { return f, nil } -func ParseIntLiteral(bytes []byte) (int64, error) { - if len(bytes) == 0 { - return 0, errors.New("JSON Error: empty byte slice found while parsing integer value") - } - - neg, bytes := trimNegativeSign(bytes) - - var n uint64 = 0 - for _, c := range bytes { - if notDigit(c) { - return 0, errors.New("JSON Error: non-digit characters found while parsing integer value") - } - - if n > maxUint64/10 { - return 0, errors.New("JSON Error: numeric value exceeds the range limit") - } - - n *= 10 - - n1 := n + uint64(c-'0') - if n1 < n { - return 0, errors.New("JSON Error: numeric value exceeds the range limit") - } - - n = n1 - } - - if n > maxInt64 { - if neg && n == absMinInt64 { - return -absMinInt64, nil - } - - return 0, errors.New("JSON Error: numeric value exceeds the range limit") - } - - if neg { - return -int64(n), nil - } - - return int64(n), nil -} - // extractMantissaAndExp10 parses a byte slice representing a decimal number and extracts the mantissa and the exponent of its base-10 representation. // It iterates through the bytes, constructing the mantissa by treating each byte as a digit. // If a decimal point is encountered, the function keeps track of the position of the decimal point to calculate the exponent. @@ -147,7 +106,7 @@ func extractMantissaAndExp10(bytes []byte) (uint64, int, error) { continue } - if notDigit(c) { + if !unicode.IsDigit(rune(c)) { return 0, 0, errors.New("JSON Error: non-digit characters found while parsing integer value") } @@ -175,11 +134,11 @@ func trimNegativeSign(bytes []byte) (bool, []byte) { return false, bytes } -func notDigit(c byte) bool { - return (c & 0xF0) != 0x30 -} - // lower converts a byte to lower case if it is an uppercase letter. func lower(c byte) byte { return c | 0x20 } + +func isAlphaNumeric(c byte) bool { + return unicode.IsLetter(rune(c)) || unicode.IsDigit(rune(c)) +} diff --git a/examples/gno.land/p/demo/json/parser_test.gno b/examples/gno.land/p/demo/json/parser_test.gno index 078aa048a61..e4d290e4ead 100644 --- a/examples/gno.land/p/demo/json/parser_test.gno +++ b/examples/gno.land/p/demo/json/parser_test.gno @@ -151,38 +151,3 @@ func TestParseFloat_May_Interoperability_Problem(t *testing.T) { }) } } - -func TestParseIntLiteral(t *testing.T) { - tests := []struct { - input string - expected int64 - }{ - {"0", 0}, - {"1", 1}, - {"-1", -1}, - {"12345", 12345}, - {"-12345", -12345}, - {"9223372036854775807", 9223372036854775807}, - {"-9223372036854775808", -9223372036854775808}, - {"-92233720368547758081", 0}, - {"18446744073709551616", 0}, - {"9223372036854775808", 0}, - {"-9223372036854775809", 0}, - {"", 0}, - {"abc", 0}, - {"12345x", 0}, - {"123e5", 0}, - {"9223372036854775807x", 0}, - {"27670116110564327410", 0}, - {"-27670116110564327410", 0}, - } - - for _, tt := range tests { - t.Run(tt.input, func(t *testing.T) { - got, _ := ParseIntLiteral([]byte(tt.input)) - if got != tt.expected { - t.Errorf("ParseIntLiteral(%s): got %v, want %v", tt.input, got, tt.expected) - } - }) - } -} diff --git a/examples/gno.land/p/demo/json/path.gno b/examples/gno.land/p/demo/json/path.gno index 31f7e04633f..6048148c4f6 100644 --- a/examples/gno.land/p/demo/json/path.gno +++ b/examples/gno.land/p/demo/json/path.gno @@ -1,11 +1,73 @@ package json import ( + "crypto/sha256" + "encoding/hex" "errors" + "io" + "math" + "strconv" + "strings" + + "gno.land/p/demo/ufmt" +) + +var ( + errUnexpectedEOF = errors.New("unexpected EOF") + errUnexpectedChar = errors.New("unexpected character") + errStringNotClosed = errors.New("string not closed") + errBracketNotClosed = errors.New("bracket not closed") + errInvalidSlicePathSyntax = errors.New("invalid slice path syntax") + errInvalidSliceFromValue = errors.New("invalid slice from value") + errInvalidSliceToValue = errors.New("invalid slice to value") + errInvalidSliceStepValue = errors.New("invalid slice step value") ) -// ParsePath takes a JSONPath string and returns a slice of strings representing the path segments. -func ParsePath(path string) ([]string, error) { +// caching nodes to avoid unmarshalling the same JSON data multiple times +var cacheNode = make(map[string]*Node) + +// generateCacheKey creates a hash of the data to be used as a cache key. +func generateCacheKey(data []byte) string { + hash := sha256.Sum256(data) + return hex.EncodeToString(hash[:]) +} + +// Path returns the nodes that match the given JSON path. +func Path(data []byte, path string) ([]*Node, error) { + commands, err := parsePath(path) + if err != nil { + return nil, ufmt.Errorf("failed to parse path: %v", err) + } + + dataKey := generateCacheKey(data) + nodes, ok := cacheNode[dataKey] + + if !ok { + nodes, err = Unmarshal(data) + if err != nil { + return nil, ufmt.Errorf("failed to unmarshal JSON: %v", err) + } + cacheNode[dataKey] = nodes + } + + return applyPath(nodes, commands) +} + +// parsePath parses the given path string and returns a slice of commands to be run. +// +// The function uses a state machine approach to parse the path based on the encountered tokens. +// It supports the following tokens and their corresponding states: +// - Dollar sign ('$'): Appends a literal "$" to the result slice. +// - Dot ('.'): Calls the processDot function to handle the dot token. +// - Single dot ('.') followed by a child end character: Appends the substring between the dots to the result slice. +// - Double dot ('..'): Appends ".." to the result slice. +// - Opening bracket ('['): Calls the processBracketOpen function to handle the opening bracket token. +// - Single quote ('”') after the opening bracket: Calls the processSingleQuote function to handle the string within single quotes. +// - Any other character after the opening bracket: Calls the processWithoutSingleQuote function to handle the string without single quotes. +// +// The function returns the slice of parsed commands and any error encountered during the parsing process. +// If an unexpected character is encountered, an error (errUnexpectedChar) is returned. +func parsePath(path string) ([]string, error) { buf := newBuffer([]byte(path)) result := make([]string, 0) @@ -14,65 +76,469 @@ func ParsePath(path string) ([]string, error) { if err != nil { break } + switch b { + case dollarSign: + result = append(result, "$") + case atSign: + result = append(result, "@") + case dot: + result, err = processDot(buf, result) + if err != nil { + return nil, err + } + case bracketOpen: + result, err = processBracketOpen(buf, result) + if err != nil { + return nil, err + } + default: + return nil, errUnexpectedChar + } - switch { - case b == dollarSign || b == atSign: - result = append(result, string(b)) - buf.step() + err = buf.step() + if err != nil && err != io.EOF { + return nil, err + } + } - case b == dot: - buf.step() + return result, nil +} - if next, _ := buf.current(); next == dot { - buf.step() - result = append(result, "..") +func applyPath(node *Node, cmds []string) ([]*Node, error) { + result := make([]*Node, 0) - extractNextSegment(buf, &result) - } else { - extractNextSegment(buf, &result) - } + for i, cmd := range cmds { + if i == 0 && (cmd == "$" || cmd == "@") { // root or current + result = append(result, node) + continue + } - case b == bracketOpen: - start := buf.index - buf.step() + var err error + result, err = processCommand(cmd, result) + if err != nil { + return nil, err + } + } - for { - if buf.index >= buf.length || buf.data[buf.index] == bracketClose { - break - } + return result, nil +} - buf.step() - } +// processCommand processes a single command on the given nodes. +// +// It determines the type of command and calls the corresponding function to handle the command. +func processCommand(cmd string, nodes []*Node) ([]*Node, error) { + switch { + case cmd == "..": + return processRecursiveDescent(nodes), nil + case cmd == "*": + return processWildcard(nodes), nil + case strings.Contains(cmd, ":"): + return processSlice(cmd, nodes) + case strings.HasPrefix(cmd, "?(") && strings.HasSuffix(cmd, ")"): + panic("filter not implemented") + default: + res, err := processKeyUnion(cmd, nodes) + if err != nil { + return nil, err + } - if buf.index >= buf.length { - return nil, errors.New("unexpected end of path") - } + return res, nil + } +} - segment := string(buf.sliceFromIndices(start+1, buf.index)) - result = append(result, segment) +// processWildcard processes a wildcard command on the given nodes. +// +// It retrieves all the child nodes of each node in the given slice. +func processWildcard(nodes []*Node) (result []*Node) { + for _, node := range nodes { + result = append(result, node.getSortedChildren()...) + } + return result +} - buf.step() +// processRecursiveDescent performs a recursive descent on the given nodes. +// +// It recursively retrieves all the child nodes of each node in the given slice. +func processRecursiveDescent(nodes []*Node) (result []*Node) { + for _, node := range nodes { + result = append(result, recursiveChildren(node)...) + } + return result +} - default: - buf.step() +// recursiveChildren returns all the recursive child nodes of the given node that are containers. +// +// It recursively traverses the child nodes of the given node and their child nodes, +// and returns a slice of pointers to all the child nodes that are containers. +func recursiveChildren(node *Node) (result []*Node) { + if node.isContainer() { + for _, element := range node.getSortedChildren() { + if element.isContainer() { + result = append(result, element) + } } } + temp := make([]*Node, 0, len(result)) + temp = append(temp, result...) + + for _, element := range result { + temp = append(temp, recursiveChildren(element)...) + } + + return temp +} + +var pathSegmentDelimiters = map[byte]bool{dot: true, bracketOpen: true} + +// processDot handles the processing when a dot character is found in the buffer. +// +// It checks the next character in the buffer. +// If the next character is also a dot, it appends ".." to the result slice. +// Otherwise, it reads the characters until the next child end character (childEnd) and appends the substring to the result slice. +// It returns the updated result slice and any error encountered. +func processDot(buf *buffer, result []string) ([]string, error) { + start := buf.index + + b, err := buf.next() + if err == io.EOF { + err = nil + return result, nil + } + + if err != nil { + return nil, err + } + + if b == dot { + result = append(result, "..") + buf.index-- + return result, nil + } + + err = buf.skipAny(pathSegmentDelimiters) + stop := buf.index + + if err == io.EOF { + err = nil + stop = buf.length + } else { + buf.index-- + } + + if err != nil { + return nil, err + } + + if start+1 < stop { + result = append(result, string(buf.data[start+1:stop])) + } + return result, nil } -// extractNextSegment extracts the segment from the current index -// to the next significant character and adds it to the resulting slice. -func extractNextSegment(buf *buffer, result *[]string) { +// processBracketOpen handles the processing when an opening bracket character ('[') is found in the buffer. +// +// It reads the next character in the buffer and determines the appropriate processing based on the character: +// - If the next character is a single quote (`'`), it calls the processSingleQuote function to handle the string within single quotes. +// - Otherwise, it calls the processWithoutSingleQuote function to handle the string without single quotes. +// +// It returns the updated result slice and any error encountered. +func processBracketOpen(buf *buffer, result []string) ([]string, error) { + b, err := buf.next() + if err != nil { + return nil, errUnexpectedEOF + } + start := buf.index - buf.skipToNextSignificantToken() + if b == singleQuote { + result, err = processSingleQuote(buf, result, start) + } else { + result, err = processWithoutSingleQuote(buf, result, start) + } + + if err != nil { + return nil, err + } + + return result, nil +} + +// processSingleQuote handles the processing when a single quote character (`'`) is encountered after an opening bracket ('[') in the buffer. +// +// It assumes that the current position of the buffer is just after the single quote character. +// +// The function performs the following steps: +// 1. It skips the single quote character and reads the string until the next single quote character is found. +// 2. It checks if the character after the closing single quote is a closing bracket (']'). +// - If it is, the string between the single quotes is appended to the result slice. +// - If it is not, an error (errBracketNotClosed) is returned. +// +// It returns the updated result slice and any error encountered. +func processSingleQuote(buf *buffer, result []string, start int) ([]string, error) { + start++ + + err := buf.string(singleQuote, true) + if err != nil { + return nil, errStringNotClosed + } + + stop := buf.index + + b, err := buf.next() + if err != nil { + return nil, errUnexpectedEOF + } + + if b != bracketClose { + return nil, errBracketNotClosed + } + + result = append(result, string(buf.data[start:stop])) + + return result, nil +} + +// processWithoutSingleQuote handles the processing when a character other than +// a single quote (`'`) is encountered after an opening bracket ('[') in the buffer. +// +// It assumes that the current position of the buffer is just after the opening bracket. +// +// The function reads the characters until the next closing bracket (']') is found +// and appends the substring between the brackets to the result slice. +// +// It returns the updated result slice and any error encountered. +// If the closing bracket is not found, an error (errUnexpectedEOF) is returned. +func processWithoutSingleQuote(buf *buffer, result []string, start int) ([]string, error) { + err := buf.skip(bracketClose) + if err != nil { + return nil, errUnexpectedEOF + } + + stop := buf.index + result = append(result, string(buf.data[start:stop])) - if buf.index <= start { - return + return result, nil +} + +// processSlice processes a slice path on the given nodes. +// +// The slice path has the following syntax: +// +// [start:end:step] +// +// - start: The starting index of the slice (inclusive). if omitted, it defaults to 0. +// If negative, it counts from the end of the array. +// - end: The ending index of the slice (exclusive). if omitted, it defaults to the length of the array. +// If negative, it counts from the end of the array. +// - step: The step value for the slice. if omitted, it defaults to 1. +// +// The function performs the following steps: +// +// 1. Split the slice path into start, end, and step values. +// +// 2. Parses the each syntax components as integers. +// +// 3. For each node in the given nodes: +// - If the node is an array: +// - Calculate the length of the array. +// - Adjust the start and end values if they are negative. +// - Check if the slice range is within the bounds of the array. +// - Iterate over the array elements based on the start, end and step values. +// - Append the selected elements to the result slice. +// +// It returns the slice of selected nodes and any error encountered during the parsing process. +func processSlice(cmd string, nodes []*Node) ([]*Node, error) { + from, to, step, err := parseSliceParams(cmd) + if err != nil { + return nil, err + } + + var result []*Node + for _, node := range nodes { + if node.IsArray() { + result = append(result, selectArrayElement(node, from, to, step)...) + } + } + + return result, nil +} + +// parseSliceParams parses the slice parameters from the given path command. +func parseSliceParams(cmd string) (int64, int64, int64, error) { + keys := strings.Split(cmd, ":") + ks := len(keys) + if ks > 3 { + return 0, 0, 0, errInvalidSlicePathSyntax + } + + from, err := strconv.ParseInt(keys[0], 10, 64) + if err != nil { + return 0, 0, 0, errInvalidSliceFromValue + } + + to := int64(0) + if ks > 1 { + to, err = strconv.ParseInt(keys[1], 10, 64) + if err != nil { + return 0, 0, 0, errInvalidSliceToValue + } + } + + step := int64(1) + if ks == 3 { + step, err = strconv.ParseInt(keys[2], 10, 64) + if err != nil { + return 0, 0, 0, errInvalidSliceStepValue + } + } + + return from, to, step, nil +} + +// selectArrayElement selects the array elements based on the given from, to, and step values. +func selectArrayElement(node *Node, from, to, step int64) []*Node { + length := int64(len(node.next)) + + if to == 0 { + to = length + } + + if from < 0 { + from += length + } + + if to < 0 { + to += length + } + + from = int64(math.Max(0, math.Min(float64(from), float64(length)))) + to = int64(math.Max(0, math.Min(float64(to), float64(length)))) + + if step <= 0 || from >= to { + return nil } - segment := string(buf.sliceFromIndices(start, buf.index)) - if segment != "" { - *result = append(*result, segment) + // This formula calculates the number of elements that will be selected based on the given + // from, to, and step values. It ensures that the correct number of elements are allocated + // in the result slice. + size := (to - from + step - 1) / step + result := make([]*Node, 0, size) + + for i := from; i < to; i += step { + if child, ok := node.next[ufmt.Sprintf("%d", i)]; ok { + result = append(result, child) + } + } + + return result +} + +// processKeyUnion processes a key union command on the given nodes. +// +// It retrieves the child nodes of each node in the given slice that match any of the specified keys +func processKeyUnion(cmd string, nodes []*Node) ([]*Node, error) { + buf := newBuffer([]byte(cmd)) + keys, err := extractKeys(buf) + if err != nil { + return nil, err } + + var result []*Node + for _, node := range nodes { + if node.IsArray() { + result, err = processArrayKeys(node, keys, result) + } else if node.IsObject() { + result, err = processObjectKeys(node, keys, result) + } + + if err != nil { + return nil, err + } + } + return result, nil +} + +func extractKeys(buf *buffer) ([]string, error) { + keys := make([]string, 0) + + for { + key, err := extractKey(buf) + if err != nil && err != io.EOF { + return nil, err + } + + keys = append(keys, key) + + if err := expectComma(buf); err != nil { + return keys, nil + } + } + + return keys, nil +} + +func extractKey(buf *buffer) (string, error) { + from := buf.index + if err := buf.pathToken(); err != nil { + return "", err + } + + key := string(buf.data[from:buf.index]) + if len(key) > 2 && key[0] == singleQuote && key[len(key)-1] == singleQuote { + key = key[1 : len(key)-1] + } + return key, nil +} + +func expectComma(buf *buffer) error { + c, err := buf.first() + if err != nil { + return err + } + + if c != comma { + return errUnexpectedChar + } + + return buf.step() +} + +func processArrayKeys(node *Node, keys []string, result []*Node) ([]*Node, error) { + for _, key := range keys { + switch key { + default: + index, err := strconv.Atoi(key) + if err == nil { + if index < 0 { + index = node.Size() + index + } + + if value, ok := node.next[strconv.Itoa(index)]; ok { + result = append(result, value) + } + } + } + } + return result, nil +} + +func processObjectKeys(node *Node, keys []string, result []*Node) ([]*Node, error) { + for _, key := range keys { + if value, ok := node.next[key]; ok { + result = append(result, value) + } + } + + return result, nil +} + +// Paths returns calculated paths of underlying nodes +func Paths(array []*Node) []string { + result := make([]string, 0, len(array)) + for _, element := range array { + result = append(result, element.Path()) + } + + return result } diff --git a/examples/gno.land/p/demo/json/path_test.gno b/examples/gno.land/p/demo/json/path_test.gno index f68e3eb679f..2692c456d55 100644 --- a/examples/gno.land/p/demo/json/path_test.gno +++ b/examples/gno.land/p/demo/json/path_test.gno @@ -1,62 +1,200 @@ package json -import "testing" +import ( + "strings" + "testing" + "unicode" +) func TestParseJSONPath(t *testing.T) { tests := []struct { - name string path string expected []string }{ - {name: "Empty string path", path: "", expected: []string{}}, - {name: "Root only path", path: "$", expected: []string{"$"}}, - {name: "Root with dot path", path: "$.", expected: []string{"$"}}, - {name: "All objects in path", path: "$..", expected: []string{"$", ".."}}, - {name: "Only children in path", path: "$.*", expected: []string{"$", "*"}}, - {name: "All objects' children in path", path: "$..*", expected: []string{"$", "..", "*"}}, - {name: "Simple dot notation path", path: "$.root.element", expected: []string{"$", "root", "element"}}, - {name: "Complex dot notation path with wildcard", path: "$.root.*.element", expected: []string{"$", "root", "*", "element"}}, - {name: "Path with array wildcard", path: "$.phoneNumbers[*].type", expected: []string{"$", "phoneNumbers", "*", "type"}}, - {name: "Path with filter expression", path: "$.store.book[?(@.price < 10)].title", expected: []string{"$", "store", "book", "?(@.price < 10)", "title"}}, - {name: "Path with formula", path: "$..phoneNumbers..('ty' + 'pe')", expected: []string{"$", "..", "phoneNumbers", "..", "('ty' + 'pe')"}}, - {name: "Simple bracket notation path", path: "$['root']['element']", expected: []string{"$", "'root'", "'element'"}}, - {name: "Complex bracket notation path with wildcard", path: "$['root'][*]['element']", expected: []string{"$", "'root'", "*", "'element'"}}, - {name: "Bracket notation path with integer index", path: "$['store']['book'][0]['title']", expected: []string{"$", "'store'", "'book'", "0", "'title'"}}, - {name: "Complex path with wildcard in bracket notation", path: "$['root'].*['element']", expected: []string{"$", "'root'", "*", "'element'"}}, - {name: "Mixed notation path with dot after bracket", path: "$.['root'].*.['element']", expected: []string{"$", "'root'", "*", "'element'"}}, - {name: "Mixed notation path with dot before bracket", path: "$['root'].*.['element']", expected: []string{"$", "'root'", "*", "'element'"}}, - {name: "Single character path with root", path: "$.a", expected: []string{"$", "a"}}, - {name: "Multiple characters path with root", path: "$.abc", expected: []string{"$", "abc"}}, - {name: "Multiple segments path with root", path: "$.a.b.c", expected: []string{"$", "a", "b", "c"}}, - {name: "Multiple segments path with wildcard and root", path: "$.a.*.c", expected: []string{"$", "a", "*", "c"}}, - {name: "Multiple segments path with filter and root", path: "$.a[?(@.b == 'c')].d", expected: []string{"$", "a", "?(@.b == 'c')", "d"}}, - {name: "Complex path with multiple filters", path: "$.a[?(@.b == 'c')].d[?(@.e == 'f')].g", expected: []string{"$", "a", "?(@.b == 'c')", "d", "?(@.e == 'f')", "g"}}, - {name: "Complex path with multiple filters and wildcards", path: "$.a[?(@.b == 'c')].*.d[?(@.e == 'f')].g", expected: []string{"$", "a", "?(@.b == 'c')", "*", "d", "?(@.e == 'f')", "g"}}, - {name: "Path with array index and root", path: "$.a[0].b", expected: []string{"$", "a", "0", "b"}}, - {name: "Path with multiple array indices and root", path: "$.a[0].b[1].c", expected: []string{"$", "a", "0", "b", "1", "c"}}, - {name: "Path with array index, wildcard and root", path: "$.a[0].*.c", expected: []string{"$", "a", "0", "*", "c"}}, + {path: "$", expected: []string{"$"}}, + {path: "$.", expected: []string{"$"}}, + {path: "$..", expected: []string{"$", ".."}}, + {path: "$.*", expected: []string{"$", "*"}}, + {path: "$..*", expected: []string{"$", "..", "*"}}, + {path: "$.root.element", expected: []string{"$", "root", "element"}}, + {path: "$.root.*.element", expected: []string{"$", "root", "*", "element"}}, + {path: "$['root']['element']", expected: []string{"$", "root", "element"}}, + {path: "$['root'][*]['element']", expected: []string{"$", "root", "*", "element"}}, + {path: "$['store']['book'][0]['title']", expected: []string{"$", "store", "book", "0", "title"}}, + {path: "$['root'].*['element']", expected: []string{"$", "root", "*", "element"}}, + {path: "$.['root'].*.['element']", expected: []string{"$", "root", "*", "element"}}, + {path: "$['root'].*.['element']", expected: []string{"$", "root", "*", "element"}}, + {path: "$.phoneNumbers[*].type", expected: []string{"$", "phoneNumbers", "*", "type"}}, + // TODO: support filter expressions + // {path: "$.store.book[?(@.price < 10)].title", expected: []string{"$", "store", "book", "?(@.price < 10)", "title"}}, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - reult, _ := ParsePath(tt.path) - if !isEqualSlice(reult, tt.expected) { - t.Errorf("ParsePath(%s) expected: %v, got: %v", tt.path, tt.expected, reult) + t.Run(tt.path, func(t *testing.T) { + result, err := parsePath(tt.path) + if err != nil { + t.Errorf("error on path %s: %s", tt.path, err.Error()) + } else if !sliceEqual(result, tt.expected) { + t.Errorf("expected %s, got %s", sliceString(tt.expected), sliceString(result)) } }) } } -func isEqualSlice(a, b []string) bool { +func TestJsonPath(t *testing.T) { + // JSON from: https://support.smartbear.com/alertsite/docs/monitors/api/endpoint/jsonpath.html + data := []byte(`{ + "store": { + "book": [ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 8.99 + }, + { + "category": "fiction", + "author": "J.R.R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99 + } + ], + "bicycle": { + "color": "red", + "price": 19.95 + } + }, + "expensive": 10 + }`) + + tests := []struct { + path string + expected []string + }{ + {"$.store.*", []string{ + `{ + "color": "red", + "price": 19.95 +}`, + `[ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 8.99 + }, + { + "category": "fiction", + "author": "J.R.R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99 + } +]`, + }}, + {"$.store.bicycle.color", []string{`"red"`}}, + {"$.store.book[*]", []string{ + `{ + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 +}`, + `{ + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 8.99 +}`, + `{ + "category": "fiction", + "author": "J.R.R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99 +}`, + }}, + {"$.store.book[0].title", []string{`"Sayings of the Century"`}}, + {"$.store..price", []string{`19.95`, `8.95`, `8.99`, `22.99`}}, + {"$..price", []string{`19.95`, `8.95`, `8.99`, `22.99`}}, + {"$..book[*].title", []string{`"Sayings of the Century"`, `"Moby Dick"`, `"The Lord of the Rings"`}}, + {"$..book[0]", []string{ + `{ + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 +}`, + }}, + } + + for _, tt := range tests { + result, err := Path(data, tt.path) + if err != nil { + t.Errorf("Unexpected error for path %q: %v", tt.path, err) + continue + } + + if len(result) != len(tt.expected) { + t.Errorf("Path %q: expected %d results, got %d", tt.path, len(tt.expected), len(result)) + continue + } + + for i, node := range result { + expectedNorm := normalizeJSON(tt.expected[i]) + resultNorm := normalizeJSON(node.String()) + if resultNorm != expectedNorm { + t.Errorf("Path %q: expected result %q, got %q", tt.path, expectedNorm, resultNorm) + } + } + } +} + +// normalizeJSON removes all whitespace outside of quoted text. +func normalizeJSON(s string) string { + var sb strings.Builder + inQuotes := false + for i := 0; i < len(s); i++ { + c := s[i] + if c == '"' { + inQuotes = !inQuotes + sb.WriteByte(c) + } else if inQuotes { + sb.WriteByte(c) + } else if !inQuotes && !unicode.IsSpace(rune(c)) { + sb.WriteByte(c) + } + } + return sb.String() +} + +func sliceEqual(a, b []string) bool { if len(a) != len(b) { return false } - for i, v := range a { if v != b[i] { return false } } - return true } + +func sliceString(array []string) string { + return "[" + strings.Join(array, ", ") + "]" +} diff --git a/examples/gno.land/p/demo/json/token.gno b/examples/gno.land/p/demo/json/token.gno index 4791850bf46..85776d3d227 100644 --- a/examples/gno.land/p/demo/json/token.gno +++ b/examples/gno.land/p/demo/json/token.gno @@ -17,7 +17,7 @@ const ( whiteSpace = ' ' plus = '+' minus = '-' - aesterisk = '*' + asterisk = '*' bang = '!' question = '?' newLine = '\n'