Skip to content

Commit

Permalink
feat(logic): add string_bytes/3 predicate
Browse files Browse the repository at this point in the history
  • Loading branch information
ccamel committed Jan 6, 2024
1 parent 1e262b0 commit 2e2bac2
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 11 deletions.
2 changes: 1 addition & 1 deletion x/logic/predicate/address.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func Bech32Address(vm *engine.VM, address, bech32 engine.Term, cont engine.Cont,
if err != nil {
return engine.Error(fmt.Errorf("bech32_address/2: failed to decode Bech32: %w", err))
}
pair := AtomPair.Apply(util.StringToTerm(h), util.BytesToStringTermDefault(a))
pair := AtomPair.Apply(util.StringToTerm(h), util.BytesToCodepointListTermWithDefault(a))
return engine.Unify(vm, address, pair, cont, env)
default:
return engine.Error(fmt.Errorf("bech32_address/2: invalid Bech32 type: %T, should be Atom or Variable", b))
Expand Down
2 changes: 1 addition & 1 deletion x/logic/predicate/crypto.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func CryptoDataHash(
return engine.Error(fmt.Errorf("%s: failed to hash data: %w", functor, err))
}

return engine.Unify(vm, hash, util.BytesToStringTermDefault(result), cont, env)
return engine.Unify(vm, hash, util.BytesToCodepointListTermWithDefault(result), cont, env)
})
}

Expand Down
2 changes: 1 addition & 1 deletion x/logic/predicate/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func HexBytes(vm *engine.VM, hexa, bts engine.Term, cont engine.Cont, env *engin
if result == nil {
return engine.Error(fmt.Errorf("hex_bytes/2: nil hexadecimal conversion in input"))
}
return engine.Unify(vm, bts, util.BytesToStringTermDefault(result), cont, env)
return engine.Unify(vm, bts, util.BytesToCodepointListTermWithDefault(result), cont, env)
case engine.Compound:
src, err := util.StringTermToBytes(b, "", env)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions x/logic/predicate/encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func TestHexBytesPredicate(t *testing.T) {
Convey("and a vm", func() {
interpreter := testutil.NewLightInterpreterMust(ctx)
interpreter.Register2(engine.NewAtom("hex_bytes"), HexBytes)
interpreter.Register3(engine.NewAtom("string_bytes"), StringBytes)

err := interpreter.Compile(ctx, tc.program)
So(err, ShouldBeNil)
Expand Down
64 changes: 64 additions & 0 deletions x/logic/predicate/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,67 @@ func ReadString(vm *engine.VM, stream, length, result engine.Term, cont engine.C
util.Tuple(util.StringToTerm(builder.String()), engine.Integer(totalLen)), cont, env)
})
}

// StringBytes is a predicate that unifies a string with a list of bytes, returning true when the (Unicode) String is
// represented by Bytes in Encoding.
//
// The signature is as follows:
//
// string_bytes(?String, ?Bytes, +Encoding)
//
// Where:
// - String is the string to convert to bytes. It can be an Atom, string or list of characters codes.
// - Bytes is the list of numbers between 0 and 255 that represent the sequence of bytes.
// - Encoding is the encoding to use for the conversion.
//
// Encoding can be one of the following:
// - 'text' considers the string as a sequence of Unicode characters.
// - 'octet' considers the string as a sequence of bytes.
// - 'utf8' considers the string as a sequence of UTF-8 characters.
// - '<encoding>' considers the string as a sequence of characters in the given encoding.
//
// At least one of String or Bytes must be instantiated.
//
// Examples:
//
// # Convert a string to a list of bytes.
// - string_bytes('Hello World', Bytes, octet).
//
// # Convert a list of bytes to a string.
// - string_bytes(String, [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100], octet).
func StringBytes(
vm *engine.VM, str, bts, encoding engine.Term, cont engine.Cont, env *engine.Env,
) *engine.Promise {
return engine.Delay(func(ctx context.Context) *engine.Promise {
encodingAtom, err := util.AssertAtom(env, encoding)
if err != nil {
return engine.Error(fmt.Errorf("string_bytes/3: %w", err))
}
forwardConverter := func(value []engine.Term, options engine.Term, env *engine.Env) ([]engine.Term, error) {
bs, err := util.StringTermToBytes(value[0], encodingAtom.String(), env)
if err != nil {
return nil, fmt.Errorf("string_bytes/3: %w", err)
}
result, err := util.BytesToCodepointListTerm(bs, "text")
if err != nil {
return nil, fmt.Errorf("string_bytes/3: %w", err)
}
return []engine.Term{result}, nil
}
backwardConverter := func(value []engine.Term, options engine.Term, env *engine.Env) ([]engine.Term, error) {
if !util.IsList(value[0]) {
return nil, engine.TypeError(engine.NewAtom("list"), value[0], env)
}
bs, err := util.StringTermToBytes(value[0], "text", env)
if err != nil {
return nil, fmt.Errorf("string_bytes/3: %w", err)
}
result, err := util.BytesToAtomListTerm(bs, encodingAtom.String())
if err != nil {
return nil, fmt.Errorf("string_bytes/3: %w", err)
}
return []engine.Term{result}, nil
}
return util.UnifyFunctional(vm, []engine.Term{str}, []engine.Term{bts}, encoding, forwardConverter, backwardConverter, cont, env)
})
}
130 changes: 122 additions & 8 deletions x/logic/util/prolog.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ package util

import (
"bytes"
"context"
"encoding/hex"
"fmt"
"strings"
"unicode/utf8"

"github.com/ichiban/prolog/engine"
"github.com/samber/lo"
"golang.org/x/net/html/charset"
)

Expand Down Expand Up @@ -41,9 +43,8 @@ func StringToTerm(s string) engine.Term {
return engine.NewAtom(s)
}

// BytesToStringTerm try to convert a given golang []byte into a string term.
// Function is the reverse of StringTermToBytes.
func BytesToStringTerm(in []byte, encoding string) (engine.Term, error) {
// BytesToCodepointListTerm try to convert a given golang []byte into a list of codepoints.
func BytesToCodepointListTerm(in []byte, encoding string) (engine.Term, error) {
out, err := decode(in, encoding)
if err != nil {
return nil, err
Expand All @@ -56,16 +57,30 @@ func BytesToStringTerm(in []byte, encoding string) (engine.Term, error) {
return engine.List(terms...), nil
}

// BytesToStringTermDefault is like the BytesToStringTerm function but with a default encoding.
// BytesToCodepointListTermWithDefault is like the BytesToCodepointListTerm function but with a default encoding.
// This function panics if the conversion fails, which can't happen with the default encoding.
func BytesToStringTermDefault(in []byte) engine.Term {
term, err := BytesToStringTerm(in, "")
func BytesToCodepointListTermWithDefault(in []byte) engine.Term {
term, err := BytesToCodepointListTerm(in, "")
if err != nil {
panic(err)
}
return term
}

// BytesToAtomListTerm try to convert a given golang []byte into a list of atoms, one for each character.
func BytesToAtomListTerm(in []byte, encoding string) (engine.Term, error) {
out, err := decode(in, encoding)
if err != nil {
return nil, err
}
str := string(out)
terms := make([]engine.Term, 0, len(str))
for _, c := range str {
terms = append(terms, engine.NewAtom(string(c)))
}
return engine.List(terms...), nil
}

// StringTermToBytes try to convert a given string into native golang []byte.
// String is an instantiated term which represents text as an atom, string, list of character codes or list or characters.
// Encoding is the supported encoding type:
Expand Down Expand Up @@ -241,6 +256,31 @@ func IsCompound(term engine.Term) bool {
return ok
}

// IsFullyInstantiated returns true if the given term is fully instantiated.
func IsFullyInstantiated(term engine.Term, env *engine.Env) bool {
switch term := env.Resolve(term).(type) {
case engine.Variable:
return false
case engine.Compound:
for i := 0; i < term.Arity(); i++ {
if !IsFullyInstantiated(term.Arg(i), env) {
return false
}
}
return true
default:
return true
}
}

func AreFullyInstantiated(terms []engine.Term, env *engine.Env) bool {
_, ok := lo.Find(terms, func(t engine.Term) bool {
return IsFullyInstantiated(t, env)
})

return ok
}

// AssertAtom resolves a term and attempts to convert it into an engine.Atom if possible.
// If conversion fails, the function returns the empty atom and the error.
func AssertAtom(env *engine.Env, t engine.Term) (engine.Atom, error) {
Expand Down Expand Up @@ -320,9 +360,9 @@ func GetOptionWithDefault(
// GetOptionAsAtomWithDefault is a helper function that returns the value of the first option with the given name in the
// given options.
func GetOptionAsAtomWithDefault(
algorithmOpt engine.Atom, options engine.Term, defaultValue engine.Term, env *engine.Env,
name engine.Atom, options engine.Term, defaultValue engine.Term, env *engine.Env,
) (engine.Atom, error) {
term, err := GetOptionWithDefault(algorithmOpt, options, defaultValue, env)
term, err := GetOptionWithDefault(name, options, defaultValue, env)
if err != nil {
return AtomEmpty, err
}
Expand All @@ -333,3 +373,77 @@ func GetOptionAsAtomWithDefault(

return atom, nil
}

// ConvertFunc is a function mapping a domain which is a list of terms with a codomain which is a set of terms.
// Domains and co-domains can have different cardinalities.
// options is a list of options that can be used to parameterize the conversion.
// All the terms provided are fully instantiated (i.e. no variables).
type ConvertFunc func(value []engine.Term, options engine.Term, env *engine.Env) ([]engine.Term, error)

// UnifyFunctional is a generic unification which unifies a set of input terms with a set of output terms, using the
// given conversion functions maintaining the function's relationship.
//
// The aim of this function is to simplify the implementation of a wide range of predicates which are essentially
// functional, like hash functions, encoding functions, etc.
//
// The semantic of the unification is as follows:
// 1. first all the variables are resolved
// 2. if there's variables in the input and the output,
// the conversion is not possible and a not sufficiently instantiated error is returned.
// 3. if there's no variables in the input,
// then the conversion is attempted from the input to the output and the result is unified with the output.
// 4. if there's no variables in the output,
// then the conversion is attempted from the output to the input and the result is unified with the input.
//
// The following table summarizes the behavior, where:
// - fi = fully instantiated (i.e. no variables)
// - !fi = not fully instantiated (i.e. at least one variable)
//
// | input | output | result |
// |-------|--------|--------------------------------------|
// | !fi | !fi | error: not sufficiently instantiated |
// | fi | !fi | unify(forward(input), output) |
// | fi | fi | unify(forward(input), output) |
// | !fi | fi | unify(input,backward(output)) |
//
// Conversion functions may produce an error in scenarios where the conversion is unsuccessful or infeasible due to
// the inherent characteristics of the function's relationship, such as the absence of a one-to-one correspondence
// (e.g. hash functions).
func UnifyFunctional(
vm *engine.VM,
in,
out []engine.Term,
options engine.Term,
forwardConverter ConvertFunc,
backwardConverter ConvertFunc,
cont engine.Cont,
env *engine.Env,
) *engine.Promise {
return engine.Delay(func(ctx context.Context) *engine.Promise {
isInFI, isOutFi := AreFullyInstantiated(in, env), AreFullyInstantiated(out, env)
if !isInFI && !isOutFi {
return engine.Error(engine.InstantiationError(env))
}

var err error
from, to := in, out
if isInFI {
from, err = forwardConverter(in, options, env)
if err != nil {
return engine.Error(err)
}
} else {
to, err = backwardConverter(out, options, env)
if err != nil {
return engine.Error(err)
}
}
return engine.Unify(
vm,
Tuple(from...),
Tuple(to...),
cont,
env,
)
})
}

0 comments on commit 2e2bac2

Please sign in to comment.