Skip to content

Commit

Permalink
feat(logic): uri_encoded/3 implement encoding component
Browse files Browse the repository at this point in the history
  • Loading branch information
bdeneux committed May 5, 2023
1 parent 276bf77 commit e02ca30
Show file tree
Hide file tree
Showing 2 changed files with 187 additions and 3 deletions.
134 changes: 131 additions & 3 deletions x/logic/predicate/uri.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package predicate
import (
"context"
"fmt"
"net/url"

"github.com/ichiban/prolog/engine"
"github.com/okp4/okp4d/x/logic/util"
)

type Component string
Expand All @@ -16,6 +18,8 @@ const (
SegmentComponent Component = "segment"
)

const upperhex = "0123456789ABCDEF"

func NewComponent(v string) (Component, error) {
switch v {
case string(QueryComponent):
Expand All @@ -31,21 +35,145 @@ func NewComponent(v string) (Component, error) {
}
}

// Return true if the specified character should be escaped when
// appearing in a URL string depending on the targeted URI component, according
// to [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986).
//
// This is a re-implementation of url.shouldEscape of net/url. Needed since the native implementation doesn't follow
// exactly the [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986) and also because the implementation of component
// escaping is only public for Path component (who in reality is SegmentPath component) and Query component. Otherwise,
// escaping doesn't fit to the SWI-Prolog escaping due to RFC discrepancy between those two implementations.
//
// Another discrepancy is on the query component that escape the space character ' ' to a '+' (plus sign) on the
// golang library and to '%20' escaping on the SWI-Prolog implementation.
//
// Here some reported issues on golang about the RFC non-compliance.
// - golang.org/issue/5684.
// - https://github.com/golang/go/issues/27559
func shouldEscape(c byte, comp Component) bool {
// §2.3 Unreserved characters (alphanum)
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
return false
}

switch c {
case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
return false

case '!', '$', '&', '\'', '(', ')', '*', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch comp {
case PathComponent: // §3.3
return c == '?' || c == ':'

case SegmentComponent: // §3.3
// The RFC allows : @ & = + $
// meaning to individual path segments.
return c == '/' || c == '?' || c == ':'

case QueryComponent: // §3.4
return c == '&' || c == '+' || c == ':' || c == ';' || c == '='
case FragmentComponent: // §4.1
return false
}
}

// Everything else must be escaped.
return true
}

// Escape return the given input string by adding percent encoding depending on the current component where it's
// supposed to be put.
// This is a re-implementation of native url.escape. See shouldEscape() comment's for more details.
func (comp Component) Escape(v string) string {
hexCount := 0
for i := 0; i < len(v); i++ {
ch := v[i]
if shouldEscape(ch, comp) {
hexCount++
}
}

if hexCount == 0 {
return v
}

var buf [64]byte
var t []byte

required := len(v) + 2*hexCount
if required <= len(buf) {
t = buf[:required]
} else {
t = make([]byte, required)
}

j := 0
for i := 0; i < len(v); i++ {
switch ch := v[i]; {
case shouldEscape(ch, comp):
t[j] = '%'
t[j+1] = upperhex[ch>>4]
t[j+2] = upperhex[ch&15]
j += 3
default:
t[j] = v[i]
j++
}
}
return string(t)
}

func (comp Component) Decode(v string) (string, error) {
switch comp {
case QueryComponent:
return url.QueryUnescape(v)
case FragmentComponent:
return "", fmt.Errorf("fragment not implemented")
case PathComponent:
return url.PathUnescape(v)
case SegmentComponent:
return "", fmt.Errorf("segment not implemented")
default:
return "", fmt.Errorf("wrong component")
}
}

func URIEncoded(vm *engine.VM, component, decoded, encoded engine.Term, cont engine.Cont, env *engine.Env) *engine.Promise {
return engine.Delay(func(ctx context.Context) *engine.Promise {
var comp Component
switch c := env.Resolve(component).(type) {
case engine.Atom:
_, err := NewComponent(c.String())
cc, err := NewComponent(c.String())
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
}
comp = cc
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid component type: %T, should be Atom", component))
}

fmt.Printf("%s", comp)
var dec string
switch d := env.Resolve(decoded).(type) {
case engine.Variable:
case engine.Atom:
dec = comp.Escape(d.String())
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid decoded type: %T, should be Variable or Atom", component))
}

return engine.Bool(true)
switch e := env.Resolve(encoded).(type) {
case engine.Variable:
return engine.Unify(vm, encoded, util.StringToTerm(dec), cont, env)
case engine.Atom:
enc, err := comp.Decode(e.String())
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
}
return engine.Unify(vm, decoded, util.StringToTerm(enc), cont, env)
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid encpded type: %T, should be Variable or Atom", component))
}
})
}
56 changes: 56 additions & 0 deletions x/logic/predicate/uri_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,62 @@ func TestURIEncoded(t *testing.T) {
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid component name hey, expected `query`, `fragment`, `path` or `segment`"),
},
{
query: `uri_encoded(path, Decoded, foo).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "foo",
}},
},
{
query: `uri_encoded(path, Decoded, 'foo%20bar').`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "'foo bar'",
}},
},
{
query: `uri_encoded(path, foo, Encoded).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "foo",
}},
},
{
query: `uri_encoded(query, 'foo bar', Encoded).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'foo%20bar'",
}},
},
{
query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(path, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(segment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-.%2F0123456789%3A;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
{
query: "uri_encoded(fragment, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25&\\'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
}},
},
}
for nc, tc := range cases {
Convey(fmt.Sprintf("Given the query #%d: %s", nc, tc.query), func() {
Expand Down

0 comments on commit e02ca30

Please sign in to comment.