Skip to content

Commit

Permalink
YAML encoder (#241)
Browse files Browse the repository at this point in the history
* Includes YAML encoding support

Note: This change bumps the gopkg.in/yaml library from v2 to v3
for the pupose of supporting custom indentation.

* Fixes invalid Markdown table definitions in encoding documentation

* Tidies code and fixes linting issues

* Regenerates stdlib

* Regen

Co-authored-by: Oliver Lade <oliver.lade@anz.com>
  • Loading branch information
andrewemeryanz and orlade-anz authored Dec 9, 2021
1 parent da7855a commit e04d6d5
Show file tree
Hide file tree
Showing 21 changed files with 191 additions and 65 deletions.
1 change: 1 addition & 0 deletions cmd/arrai/sync_darwin.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build darwin
// +build darwin

package main
Expand Down
1 change: 1 addition & 0 deletions cmd/arrai/sync_other.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build !darwin
// +build !darwin

package main
Expand Down
64 changes: 40 additions & 24 deletions docs/docs/std/encoding.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,21 @@ For details of how Arr.ai encodes XML, see [Encoding](#Encoding) below.
Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.xml.decode('<?xml version="1.0"?><root></root>')` | `[(xmldecl: 'version="1.0"'), (elem: 'root')]` |

## `//encoding.xml.decoder(config <: (:trimSurroundingWhitespace <: bool)).decode(xml <: string|bytes) <: array`

`decoder` takes a tuple used to configure decoding and returns the decoding function:

| config | description |
|:-|:-|
|:---|:---|
| `trimSurroundingWhitespace` | Strips newline strings `'\n'` used only for xml file formatting |

Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.xml.decoder((trimSurroundingWhitespace: true)).decode('<?xml version="1.0"?>\n')` | `[(xmldecl: 'version="1.0"')]` |
| `//encoding.xml.decoder((trimSurroundingWhitespace: false)).decode('<?xml version="1.0"?>\n')` | `[(xmldecl: 'version="1.0"'), '\n']` |

Expand All @@ -38,7 +39,7 @@ For details of the limitations of XML encoding, see [Limitations](#Limitations)
Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.xml.encode([(xmldecl: 'version="1.0"')])` | `<?xml version="1.0"?>` |

## `//encoding.csv.decode(csv <: string|bytes) <: array`
Expand All @@ -48,14 +49,15 @@ Usage:
Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.csv.decode('a,b,c\n1,2,3')` | `[['a', 'b', 'c'], ['1', '2', '3']]` |

## `//encoding.csv.decoder(config <: (comma <: int, comment <: int)) <: ((csv <: string|bytes) <: array)`

`decoder` takes a tuple used to configure decoding and returns the decoding function.

| config | description |
|:-|:-|
|:---|:---|
| `comma` | Configures the separator used (defaults to `%,`). |
| `comment` | Ignores lines from the input that start with the given character (defaults to regarding all lines as value input). |
| `trimLeadingSpace` | Leading white space in a field is ignored. This is ignored even if the field delimiter, comma, is white space. |
Expand All @@ -65,7 +67,7 @@ Usage:
Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.csv.decoder((comma: %:))('a:b:c\n1:2:3')` | `[['a', 'b', 'c'], ['1', '2', '3']]` |
| `//encoding.csv.decoder((comment: %#))('a,b,c\n#1,2,3')` | `[['a', 'b', 'c']]` |

Expand All @@ -76,21 +78,22 @@ Usage:
Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.csv.encode([['a', 'b', 'c'], ['1', '2', '3']])` | `<<'a,b,c\n1,2,3'>>` |

## `//encoding.csv.encoder(config <: (comma <: int, crlf <: bool)) <: (\(csv <: array) <: bytes)`

`encoder` takes a tuple used to configure encoding and returns the encoding function:

| config | description |
|:-|:-|
|:---|:---|
| `comma` | Configures the separator used (defaults to `%,`). |
| `crlf` | Encodes new lines as either `'\r\n'` when `true` or `'\n'` when `false` (defaults to `false`). |

Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.csv.encoder((comma: %:))([['a', 'b', 'c'], ['1', '2', '3']])` | `<<'a:b:c\n1:2:3'>>` |
| `//encoding.csv.encoder((crlf: true))([['a', 'b', 'c'], ['1', '2', '3']])` | `<<'a,b,c\r\n1,2,3'>>` |

Expand All @@ -103,7 +106,7 @@ Because empty sets are indistinguishable to `""`, `false`, and `[]`, `decode`
maps incoming JSON values as follows:

| JSON encoding | maps to&hellip; | notes |
|:-|:-|:-|
|:---|:---|:---|
| `"abc"` | `(s: "abc")` |
| `[1, 2, 3]` | `(a: [1, 2, 3])` |
| `false`/`true` | `(b: false)`/`(b: true)` |
Expand All @@ -114,47 +117,47 @@ maps incoming JSON values as follows:
Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.json.decode('{"hi": "abc", "hello": 123}')` | `{'hello': 123, 'hi': (s: 'abc')}` |

## `//encoding.json.decoder(config <: (strict <: bool)) <: ((json <: string|bytes) <: array)`

`decoder` takes a tuple used to configure decoding and returns the decoding function:

| config | description |
|:-|:-|
|:---|:---|
| `strict` | For types that are indistinguishable when empty (strings, bools, and arrays), wrap values in tuples with a discriminating key (defaults to `true`). |

Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.json.decoder(())('{"arr": [1], "null": null, "str": "2"}')` | `<<'{'arr': (a: [1]), 'null': (), 'str': (s: '2')}'>>` |
| `//encoding.json.decoder((strict: false))('{"arr": [1], "null": null, "str": "2"}')` | `<<'{"arr": [1], "null": (), "str": "2"}'>>` |

## `//encoding.json.encode(jsonDefinition <: set) <: string|bytes`
## `//encoding.json.encode(jsonDefinition <: set) <: bytes`

`encode` is the reverse of `decode`. It takes a built-in arr.ai value to `bytes` that represents a JSON object.

Usage:

| example | equals |
|:-|:-|
|:---|:---|
| `//encoding.json.encode({'hello': 123, 'hi': (s: 'abc'), 'yo': (a: [1,2,3])})` | `'{"hello":123,"hi":"abc","yo":[1,2,3]}'` |

## `//encoding.json.encode_indent(jsonDefinition <: set) <: string|bytes`
## `//encoding.json.encode_indent(jsonDefinition <: set) <: bytes`

`encode_indent` is like `encode` but applies indentations to format the output.

## `//encoding.json.encoder(config <: (strict <: bool, prefix <: string, indent: string, escapeHTML <: bool)) <: ((json <: string|bytes) <: array)`
## `//encoding.json.encoder(config <: (strict <: bool, prefix <: string, indent: string, escapeHTML <: bool)) <: ((jsonDefinition <: set) <: bytes)`

`encoder` takes a tuple used to configure encoding and returns the encoding function:

| config | description |
|:-|:-|
| prefix | The string to prepend to each line of encoded output (default `""`). |
| indent | The string to use for each indent on each line of encoded output (default `""`).<br/>If empty, the output will be encoded on a single line. |
| escapeHTML | Whether [problematic HTML characters](https://pkg.go.dev/encoding/json#Encoder.SetEscapeHTML) should be escaped inside JSON quoted strings (default `false`). |
|:---|:---|
| `prefix` | The string to prepend to each line of encoded output (default `""`). |
| `indent` | The string to use for each indent on each line of encoded output (default `""`).<br/>If empty, the output will be encoded on a single line. |
| `escapeHTML` | Whether [problematic HTML characters](https://pkg.go.dev/encoding/json#Encoder.SetEscapeHTML) should be escaped inside JSON quoted strings (default `false`). |
| `strict` | For types that are indistinguishable when empty (strings, bools, and arrays), require values to be wrapped in tuples with a discriminating key (defaults to `true`).<br/>If `false`, all empty sets will be encoded as `null`. |

Example:
Expand All @@ -178,7 +181,20 @@ Example:

## `//encoding.yaml.decode(json <: string|bytes) <: set`

Exactly the same as `//encoding.json.decode`, but takes either a `string` or `bytes` that represents a YAML object.
Exactly the same as `//encoding.json.decode` but takes either a `string` or `bytes` that represents a YAML object.

## `//encoding.yaml.encode(yamlDefinition <: set) <: bytes`

Exactly the same as `//encoding.json.encode` but returns `bytes` that represents a YAML object.

## `//encoding.yaml.encoder(config <: (strict <: bool, indent: int)) <: ((yamlDefinition <: set) <: bytes)`

`encoder` takes a tuple used to configure encoding and returns the encoding function:

| config | description |
|:---|:---|
| `indent` | The number of spaces to indent sections with (default `4`). |
| `strict` | For types that are indistinguishable when empty (strings, bools, and arrays), require values to be wrapped in tuples with a discriminating key (defaults to `true`).<br/>If `false`, all empty sets will be encoded as `null`. |

## `//encoding.proto.descriptor(protobufDefinition <: bytes) <: tuple`

Expand Down Expand Up @@ -241,7 +257,7 @@ Note that unlike standard `decode` functions, this is not reversible; its output
### Encoding

| Description | XML Encoding | Arr.ai Encoding |
|:-|:-|:-|
|:---|:---|:---|
| Declaration | `<?xml version="1.0"?>` | `[(xmldecl: 'version="1.0"')]` |
| Directive |`<!DOCTYPE foo <!ELEMENT foo (#PCDATA)>>` | `(directive: 'DOCTYPE foo <!ELEMENT foo (#PCDATA)>')` |
| Text | `Hello world` | `'Hello world'` |
Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,5 @@ require (
google.golang.org/genproto v0.0.0-20210608205507-b6d2f5bf0d7d // indirect
google.golang.org/grpc v1.38.0
google.golang.org/protobuf v1.26.0
gopkg.in/yaml.v2 v2.3.0
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
)
2 changes: 0 additions & 2 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/importcache/import_cache_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build timingsensitive
// +build timingsensitive

package importcache
Expand Down
3 changes: 2 additions & 1 deletion pkg/shell/shell.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//+build !wasm
//go:build !wasm
// +build !wasm

package shell

Expand Down
3 changes: 2 additions & 1 deletion pkg/shell/shell_cmd.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//+build !wasm
//go:build !wasm
// +build !wasm

package shell

Expand Down
3 changes: 2 additions & 1 deletion pkg/shell/shell_completion.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//+build !wasm
//go:build !wasm
// +build !wasm

package shell

Expand Down
3 changes: 2 additions & 1 deletion pkg/shell/shell_wasm.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//+build wasm
//go:build wasm
// +build wasm

package shell

Expand Down
1 change: 1 addition & 0 deletions pkg/test/runner_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build !windows
// +build !windows

package test
Expand Down
Binary file modified syntax/embed/stdlib-safe.arraiz
Binary file not shown.
Binary file modified syntax/embed/stdlib-unsafe.arraiz
Binary file not shown.
1 change: 1 addition & 0 deletions syntax/std_encoding_json.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func stdEncodingJSON() rel.Attr {
return jsonDecodeFnBody("json.decode", value, newJSONDecodeConfig())
}),

//nolint:dupl // Not a duplicate of JSON encoder
rel.NewNativeFunctionAttr(decoderAttr, func(_ context.Context, configValue rel.Value) (rel.Value, error) {
fn := "json.decoder"
config := newJSONDecodeConfig()
Expand Down
1 change: 0 additions & 1 deletion syntax/std_encoding_json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ func TestJSONDecode_NonStrict(t *testing.T) {
AssertCodesEvalToSameValue(t, "{}", `//encoding.json.decoder((strict: false))('[]')`)
AssertCodeErrors(t, "", `//encoding.json.decoder((strict: false))(123)`)
AssertCodesEvalToSameValue(t, "123", `//encoding.json.decoder((strict: false))('123')`)
AssertCodesEvalToSameValue(t, "123", `//encoding.json.decoder((strict: false))('123')`)

json := testJSONString()
expected := testArraiStringLoose()
Expand Down
57 changes: 57 additions & 0 deletions syntax/std_encoding_yaml.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package syntax

import (
"bytes"
"context"

"gopkg.in/yaml.v3"

"github.com/go-errors/errors"

"github.com/arr-ai/arrai/rel"
Expand All @@ -13,17 +16,27 @@ func newYAMLDecodeConfig() yamlDecodeConfig {
return yamlDecodeConfig{strict: true}
}

func newYAMLEncodeConfig() yamlEncodeConfig {
return yamlEncodeConfig{strict: true, indent: 4}
}

type yamlDecodeConfig struct {
strict bool
}

type yamlEncodeConfig struct {
strict bool
indent int
}

func stdEncodingYAML() rel.Attr {
return rel.NewTupleAttr(
"yaml",
rel.NewNativeFunctionAttr(decodeAttr, func(_ context.Context, value rel.Value) (rel.Value, error) {
return yamlDecodeFnBody("yaml.decode", value, newYAMLDecodeConfig())
}),

//nolint:dupl // Not a duplicate of YAML encoder
rel.NewNativeFunctionAttr(decoderAttr, func(_ context.Context, configValue rel.Value) (rel.Value, error) {
fn := "yaml.decoder"
config := newYAMLDecodeConfig()
Expand All @@ -41,6 +54,34 @@ func stdEncodingYAML() rel.Attr {
return yamlDecodeFnBody("yaml.decoder payload", value, config)
}), nil
}),

rel.NewNativeFunctionAttr("encode", func(_ context.Context, value rel.Value) (rel.Value, error) {
return yamlEncodeFnBody(value, newYAMLEncodeConfig())
}),

rel.NewNativeFunctionAttr("encoder", func(_ context.Context, configValue rel.Value) (rel.Value, error) {
fn := "yaml.encoder"
config := newYAMLEncodeConfig()

configTuple, ok := configValue.(rel.Tuple)
if !ok {
return nil, errors.Errorf("first arg to %s must be tuple, not %s", fn, rel.ValueTypeAsString(configValue))
}

indent, err := getConfigInt(configTuple, fn, "indent", config.indent)
if err != nil {
return nil, err
}
config.indent = indent

if strict, ok := getConfigBool(configTuple, "strict"); ok {
config.strict = strict
}

return rel.NewNativeFunction("encode", func(_ context.Context, value rel.Value) (rel.Value, error) {
return yamlEncodeFnBody(value, config)
}), nil
}),
)
}

Expand All @@ -55,3 +96,19 @@ func yamlDecodeFnBody(fn string, value rel.Value, config yamlDecodeConfig) (rel.
}
return val, nil
}

func yamlEncodeFnBody(value rel.Value, config yamlEncodeConfig) (rel.Value, error) {
t := translate.NewTranslator(config.strict)
data, err := t.FromArrai(value)
if err != nil {
return nil, err
}
result := bytes.NewBuffer([]byte{})
enc := yaml.NewEncoder(result)
enc.SetIndent(config.indent)
err = enc.Encode(data)
if err != nil {
return nil, err
}
return rel.NewBytes(result.Bytes()), nil
}
Loading

0 comments on commit e04d6d5

Please sign in to comment.