From e02ca308f65d1b666ba37f13e3ad08d5ad236d14 Mon Sep 17 00:00:00 2001 From: Benjamin DENEUX Date: Fri, 5 May 2023 15:07:08 +0200 Subject: [PATCH] feat(logic): uri_encoded/3 implement encoding component --- x/logic/predicate/uri.go | 134 +++++++++++++++++++++++++++++++++- x/logic/predicate/uri_test.go | 56 ++++++++++++++ 2 files changed, 187 insertions(+), 3 deletions(-) diff --git a/x/logic/predicate/uri.go b/x/logic/predicate/uri.go index 88cf4359..63dffd49 100644 --- a/x/logic/predicate/uri.go +++ b/x/logic/predicate/uri.go @@ -3,8 +3,10 @@ package predicate import ( "context" "fmt" + "net/url" "github.com/ichiban/prolog/engine" + "github.com/okp4/okp4d/x/logic/util" ) type Component string @@ -16,6 +18,8 @@ const ( SegmentComponent Component = "segment" ) +const upperhex = "0123456789ABCDEF" + func NewComponent(v string) (Component, error) { switch v { case string(QueryComponent): @@ -31,21 +35,145 @@ func NewComponent(v string) (Component, error) { } } +// Return true if the specified character should be escaped when +// appearing in a URL string depending on the targeted URI component, according +// to [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986). +// +// This is a re-implementation of url.shouldEscape of net/url. Needed since the native implementation doesn't follow +// exactly the [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986) and also because the implementation of component +// escaping is only public for Path component (who in reality is SegmentPath component) and Query component. Otherwise, +// escaping doesn't fit to the SWI-Prolog escaping due to RFC discrepancy between those two implementations. +// +// Another discrepancy is on the query component that escape the space character ' ' to a '+' (plus sign) on the +// golang library and to '%20' escaping on the SWI-Prolog implementation. +// +// Here some reported issues on golang about the RFC non-compliance. +// - golang.org/issue/5684. +// - https://github.com/golang/go/issues/27559 +func shouldEscape(c byte, comp Component) bool { + // §2.3 Unreserved characters (alphanum) + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { + return false + } + + switch c { + case '-', '.', '_', '~': // §2.3 Unreserved characters (mark) + return false + + case '!', '$', '&', '\'', '(', ')', '*', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) + // Different sections of the URL allow a few of + // the reserved characters to appear unescaped. + switch comp { + case PathComponent: // §3.3 + return c == '?' || c == ':' + + case SegmentComponent: // §3.3 + // The RFC allows : @ & = + $ + // meaning to individual path segments. + return c == '/' || c == '?' || c == ':' + + case QueryComponent: // §3.4 + return c == '&' || c == '+' || c == ':' || c == ';' || c == '=' + case FragmentComponent: // §4.1 + return false + } + } + + // Everything else must be escaped. + return true +} + +// Escape return the given input string by adding percent encoding depending on the current component where it's +// supposed to be put. +// This is a re-implementation of native url.escape. See shouldEscape() comment's for more details. +func (comp Component) Escape(v string) string { + hexCount := 0 + for i := 0; i < len(v); i++ { + ch := v[i] + if shouldEscape(ch, comp) { + hexCount++ + } + } + + if hexCount == 0 { + return v + } + + var buf [64]byte + var t []byte + + required := len(v) + 2*hexCount + if required <= len(buf) { + t = buf[:required] + } else { + t = make([]byte, required) + } + + j := 0 + for i := 0; i < len(v); i++ { + switch ch := v[i]; { + case shouldEscape(ch, comp): + t[j] = '%' + t[j+1] = upperhex[ch>>4] + t[j+2] = upperhex[ch&15] + j += 3 + default: + t[j] = v[i] + j++ + } + } + return string(t) +} + +func (comp Component) Decode(v string) (string, error) { + switch comp { + case QueryComponent: + return url.QueryUnescape(v) + case FragmentComponent: + return "", fmt.Errorf("fragment not implemented") + case PathComponent: + return url.PathUnescape(v) + case SegmentComponent: + return "", fmt.Errorf("segment not implemented") + default: + return "", fmt.Errorf("wrong component") + } +} + func URIEncoded(vm *engine.VM, component, decoded, encoded engine.Term, cont engine.Cont, env *engine.Env) *engine.Promise { return engine.Delay(func(ctx context.Context) *engine.Promise { var comp Component switch c := env.Resolve(component).(type) { case engine.Atom: - _, err := NewComponent(c.String()) + cc, err := NewComponent(c.String()) if err != nil { return engine.Error(fmt.Errorf("uri_encoded/3: %w", err)) } + comp = cc default: return engine.Error(fmt.Errorf("uri_encoded/3: invalid component type: %T, should be Atom", component)) } - fmt.Printf("%s", comp) + var dec string + switch d := env.Resolve(decoded).(type) { + case engine.Variable: + case engine.Atom: + dec = comp.Escape(d.String()) + default: + return engine.Error(fmt.Errorf("uri_encoded/3: invalid decoded type: %T, should be Variable or Atom", component)) + } - return engine.Bool(true) + switch e := env.Resolve(encoded).(type) { + case engine.Variable: + return engine.Unify(vm, encoded, util.StringToTerm(dec), cont, env) + case engine.Atom: + enc, err := comp.Decode(e.String()) + if err != nil { + return engine.Error(fmt.Errorf("uri_encoded/3: %w", err)) + } + return engine.Unify(vm, decoded, util.StringToTerm(enc), cont, env) + default: + return engine.Error(fmt.Errorf("uri_encoded/3: invalid encpded type: %T, should be Variable or Atom", component)) + } }) } diff --git a/x/logic/predicate/uri_test.go b/x/logic/predicate/uri_test.go index 45783907..5f617d47 100644 --- a/x/logic/predicate/uri_test.go +++ b/x/logic/predicate/uri_test.go @@ -30,6 +30,62 @@ func TestURIEncoded(t *testing.T) { wantSuccess: false, wantError: fmt.Errorf("uri_encoded/3: invalid component name hey, expected `query`, `fragment`, `path` or `segment`"), }, + { + query: `uri_encoded(path, Decoded, foo).`, + wantSuccess: true, + wantResult: []types.TermResults{{ + "Decoded": "foo", + }}, + }, + { + query: `uri_encoded(path, Decoded, 'foo%20bar').`, + wantSuccess: true, + wantResult: []types.TermResults{{ + "Decoded": "'foo bar'", + }}, + }, + { + query: `uri_encoded(path, foo, Encoded).`, + wantSuccess: true, + wantResult: []types.TermResults{{ + "Encoded": "foo", + }}, + }, + { + query: `uri_encoded(query, 'foo bar', Encoded).`, + wantSuccess: true, + wantResult: []types.TermResults{{ + "Encoded": "'foo%20bar'", + }}, + }, + { + query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).", + wantSuccess: true, + wantResult: []types.TermResults{{ + "Encoded": "'%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'", + }}, + }, + { + query: "uri_encoded(path, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).", + wantSuccess: true, + wantResult: []types.TermResults{{ + "Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'", + }}, + }, + { + query: "uri_encoded(segment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).", + wantSuccess: true, + wantResult: []types.TermResults{{ + "Encoded": "'%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'", + }}, + }, + { + query: "uri_encoded(fragment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).", + wantSuccess: true, + wantResult: []types.TermResults{{ + "Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'", + }}, + }, } for nc, tc := range cases { Convey(fmt.Sprintf("Given the query #%d: %s", nc, tc.query), func() {