loader: Update to accept file:// URLs.

The file loader splits paths on the first colon character and uses the left-hand side for the prefix to root the document at under data. On windows this is problematic because of drive lettesr (e.g., C:\X\Y\Z is interpreted as load file at \X\Y\Z under data.C. This change updates the loader to accept file:// URLs. This way callers can unambiguously specify filenames that contain colon characters. For now this will mainly be used by VS Code and other programmatic callers. In future we can support other schemes (e.g., http). Fixes #1505 Signed-off-by: Torin Sandall <torinsandall@gmail.com>
open-policy-agent · Aug 22, 2019 · 3be55ed · 3be55ed
1 parent 614670e
commit 3be55ed
Show file tree

Hide file tree

Showing 4 changed files with 146 additions and 0 deletions.
diff --git a/cmd/eval.go b/cmd/eval.go
@@ -107,6 +107,10 @@ To evaluate a query against JSON data:
 
 	$ opa eval --data data.json 'data.names[_] = name'
 
+To evaluate a query against JSON data supplied with a file:// URL:
+
+	$ opa eval --data file:///path/to/file.json 'data'
+
 File Loading
 ------------
 

diff --git a/cmd/run.go b/cmd/run.go
@@ -86,6 +86,10 @@ Data file and directory paths can be prefixed with the desired destination in
 the data document with the following syntax:
 
 	<dotted-path>:<file-path>
+
+File paths can be specified as URLs to resolve ambiguity in paths containing colons:
+
+	$ opa run file:///c:/path/to/data.json
 `,
 		Run: func(cmd *cobra.Command, args []string) {
 

diff --git a/loader/loader.go b/loader/loader.go
@@ -9,8 +9,10 @@ import (
 	"bytes"
 	"fmt"
 	"io/ioutil"
+	"net/url"
 	"os"
 	"path/filepath"
+	"runtime"
 	"strings"
 
 	"github.com/ghodss/yaml"
@@ -118,6 +120,10 @@ func Filtered(paths []string, filter Filter) (*Result, error) {
 
 // Rego returns a RegoFile object loaded from the given path.
 func Rego(path string) (*RegoFile, error) {
+	path, err := cleanFileURL(path)
+	if err != nil {
+		return nil, err
+	}
 	bs, err := ioutil.ReadFile(path)
 	if err != nil {
 		return nil, err
@@ -134,6 +140,10 @@ func CleanPath(path string) string {
 // and path is a directory, then Paths will walk the directory structure
 // recursively and list files at each level.
 func Paths(path string, recurse bool) (paths []string, err error) {
+	path, err = cleanFileURL(path)
+	if err != nil {
+		return nil, err
+	}
 	err = filepath.Walk(path, func(f string, info os.FileInfo, err error) error {
 		if !recurse {
 			if path != f && path != filepath.Dir(f) {
@@ -149,6 +159,11 @@ func Paths(path string, recurse bool) (paths []string, err error) {
 // SplitPrefix returns a tuple specifying the document prefix and the file
 // path.
 func SplitPrefix(path string) ([]string, string) {
+	// Non-prefixed URLs can be returned without modification and their contents
+	// can be rooted directly under data.
+	if strings.Index(path, "://") == strings.Index(path, ":") {
+		return nil, path
+	}
 	parts := strings.SplitN(path, ":", 2)
 	if len(parts) == 2 && len(parts[0]) > 0 {
 		return strings.Split(parts[0], "."), parts[1]
@@ -227,6 +242,13 @@ func all(paths []string, filter Filter, f func(*Result, string, int) error) (*Re
 }
 
 func allRec(path string, filter Filter, errors *loaderErrors, loaded *Result, depth int, f func(*Result, string, int) error) {
+
+	path, err := cleanFileURL(path)
+	if err != nil {
+		errors.Add(err)
+		return
+	}
+
 	info, err := os.Stat(path)
 	if err != nil {
 		errors.Add(err)
@@ -261,6 +283,32 @@ func allRec(path string, filter Filter, errors *loaderErrors, loaded *Result, de
 	}
 }
 
+func cleanFileURL(path string) (string, error) {
+
+	if strings.Contains(path, "://") {
+
+		url, err := url.Parse(path)
+		if err != nil {
+			return "", err
+		}
+
+		if url.Scheme != "file" {
+			return "", fmt.Errorf("unsupported URL scheme: %v", path)
+		}
+
+		path = url.Path
+
+		// Trim leading slash on Windows if present. The url.Path field returned
+		// by url.Parse has leading slash that causes CreateFile() calls to fail
+		// on Windows. See https://github.com/golang/go/issues/6027 for details.
+		if runtime.GOOS == "windows" && len(path) >= 1 && path[0] == '/' {
+			path = path[1:]
+		}
+	}
+
+	return path, nil
+}
+
 func exclude(filters []Filter, path string, info os.FileInfo, depth int) bool {
 	for _, f := range filters {
 		if f(path, info, depth) {

diff --git a/loader/loader_test.go b/loader/loader_test.go
@@ -349,6 +349,96 @@ func TestLoadErrors(t *testing.T) {
 	})
 }
 
+func TestLoadFileURL(t *testing.T) {
+	files := map[string]string{
+		"/a/a/1.json": `1`,        // this will load as a directory (e.g., file://a/a)
+		"b.json":      `{"b": 2}`, // this will load as a normal file
+		"c.json":      `3`,        // this will loas as rooted file
+	}
+	test.WithTempFS(files, func(rootDir string) {
+
+		paths := mustListPaths(rootDir, false)[1:]
+		sort.Strings(paths)
+
+		for i := range paths {
+			paths[i] = "file://" + paths[i]
+		}
+
+		paths[2] = "c:" + paths[2]
+
+		result, err := All(paths)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		exp := parseJSON(`{"a": 1, "b": 2, "c": 3}`)
+		if !reflect.DeepEqual(exp, result.Documents) {
+			t.Fatalf("Expected %v but got %v", exp, result.Documents)
+		}
+	})
+}
+
+func TestUnsupportedURLScheme(t *testing.T) {
+	_, err := All([]string{"http://openpolicyagent.org"})
+	if err == nil || !strings.Contains(err.Error(), "unsupported URL scheme: http://openpolicyagent.org") {
+		t.Fatal(err)
+	}
+}
+
+func TestSplitPrefix(t *testing.T) {
+
+	tests := []struct {
+		input     string
+		wantParts []string
+		wantPath  string
+	}{
+		{
+			input:    "foo/bar",
+			wantPath: "foo/bar",
+		},
+		{
+			input:     "foo:/bar",
+			wantParts: []string{"foo"},
+			wantPath:  "/bar",
+		},
+		{
+			input:     "foo.bar:/baz",
+			wantParts: []string{"foo", "bar"},
+			wantPath:  "/baz",
+		},
+		{
+			input:    "file:///a/b/c",
+			wantPath: "file:///a/b/c",
+		},
+		{
+			input:     "x.y:file:///a/b/c",
+			wantParts: []string{"x", "y"},
+			wantPath:  "file:///a/b/c",
+		},
+		{
+			input:    "file:///c:/a/b/c",
+			wantPath: "file:///c:/a/b/c",
+		},
+		{
+			input:     "x.y:file:///c:/a/b/c",
+			wantParts: []string{"x", "y"},
+			wantPath:  "file:///c:/a/b/c",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.input, func(t *testing.T) {
+			parts, path := SplitPrefix(tc.input)
+			if !reflect.DeepEqual(parts, tc.wantParts) {
+				t.Errorf("wanted parts %v but got %v", tc.wantParts, parts)
+			}
+			if path != tc.wantPath {
+				t.Errorf("wanted path %q but got %q", path, tc.wantPath)
+			}
+		})
+	}
+}
+
 func TestLoadRegos(t *testing.T) {
 	files := map[string]string{
 		"/x.rego": `