Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement locRootPath #4909

Merged
merged 3 commits into from
Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion api/internal/localizer/localizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,10 @@ func (lc *localizer) localizeDir(path string) (string, error) {
if repo := ldr.Repo(); repo != "" {
// TODO(annasong): You need to check if you can add a localize directory here to store
// the remote file content. There may be a directory that shares the localize directory name.
locPath = locRootPath(path, repo, root)
locPath, err = locRootPath(path, repo, root, lc.fSys)
if err != nil {
return "", err
}
} else {
locPath, err = filepath.Rel(lc.root.String(), root.String())
if err != nil {
Expand Down
76 changes: 67 additions & 9 deletions api/internal/localizer/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ const (

// LocalizeDir is the name of the localize directories used to store remote content in the localize destination
LocalizeDir = "localized-files"

// FileSchemeDir is the name of the directory immediately inside LocalizeDir used to store file-schemed repos
FileSchemeDir = "file-schemed"
)

// establishScope returns the effective scope given localize arguments and targetLdr at rawTarget. For remote rawTarget,
Expand Down Expand Up @@ -132,25 +135,80 @@ func locFilePath(fileURL string) string {
// File urls must have http or https scheme, so it is safe to use url.Parse.
u, err := url.Parse(fileURL)
if err != nil {
log.Fatalf("cannot parse validated file url %q: %s", fileURL, err.Error())
log.Panicf("cannot parse validated file url %q: %s", fileURL, err)
}

// Percent-encodings should be preserved in case sub-delims have special meaning.
// HTTP requests use the escaped path, so we use it here. Escaped paths also help us
// preserve percent-encoding in the original path, in the absence of illegal characters,
// in case they have special meaning to the host.
// Extraneous '..' parent directory dot-segments should be removed.
path := filepath.Join(string(filepath.Separator), filepath.FromSlash(u.EscapedPath()))

// The host should not include userinfo or port.
// We intentionally exclude userinfo and port.
// Raw github urls are the only type of file urls kustomize officially accepts.
// In this case, the path already consists of org, repo, version, and path in repo, in order,
// so we can use it as is.
return filepath.Join(LocalizeDir, u.Hostname(), path)
}

// locRootPath returns the relative localized path of the validated root url rootURL, where the local copy of its repo
// is at repoDir and the copy of its root is at rootDir
// TODO(annasong): implement
func locRootPath(rootURL string, repoDir string, rootDir filesys.ConfirmedDir) string {
_ = rootURL
_, _ = repoDir, rootDir
return ""
// is at repoDir and the copy of its root is at root on fSys.
func locRootPath(rootURL, repoDir string, root filesys.ConfirmedDir, fSys filesys.FileSystem) (string, error) {
repoSpec, err := git.NewRepoSpecFromURL(rootURL)
if err != nil {
log.Panicf("cannot parse validated repo url %q: %s", rootURL, err)
}
host, err := parseHost(repoSpec)
if err != nil {
return "", errors.WrapPrefixf(err, "unable to parse host of remote root %q", rootURL)
}
repo, err := filesys.ConfirmDir(fSys, repoDir)
if err != nil {
log.Panicf("unable to establish validated repo download location %q: %s", repoDir, err)
}
// calculate from copy instead of url to straighten symlinks
inRepo, err := filepath.Rel(repo.String(), root.String())
if err != nil {
log.Panicf("cannot find path from %q to child directory %q: %s", repo, root, err)
}
// We do not need to escape OrgRepo, a path on the git server.
// However, like git, we clean dot-segments from OrgRepo.
// Git does not allow ref value to contain dot-segments.
return filepath.Join(LocalizeDir,
host,
filepath.Join(string(filepath.Separator), filepath.FromSlash(repoSpec.OrgRepo)),
filepath.FromSlash(repoSpec.Ref),
inRepo), nil
}

// parseHost returns the localize directory path corresponding to repoSpec.Host
func parseHost(repoSpec *git.RepoSpec) (string, error) {
var target string
switch scheme, _, _ := strings.Cut(repoSpec.Host, "://"); scheme {
case "gh:":
// 'gh' was meant to be a local github.com shorthand, in which case
// the .gitconfig file could map it to any host. See origin here:
// https://github.com/kubernetes-sigs/kustomize/blob/kustomize/v4.5.7/api/internal/git/repospec.go#L203
// We give it a special host directory here under the assumption
// that we are unlikely to have another host simply named 'gh'.
return "gh", nil
case "file":
// We put file-scheme repos under a special directory to avoid
// colluding local absolute paths with hosts.
return FileSchemeDir, nil
case "https", "http", "ssh":
target = repoSpec.Host
default:
// We must have relative ssh url; in other words, the url has scp-like syntax.
// We attach a scheme to avoid url.Parse errors.
target = "ssh://" + repoSpec.Host
Copy link
Contributor

@natasha41575 natasha41575 Dec 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not that familiar with URL parsing, so I have some questions that may help me understand the goals of this code:

  1. Does this ssh you're adding show up in the output of localize?
  2. Is this ssh a placeholder just to avoid url.Parse errors, or are we intentionally adding the ssh because we expect by default schemes to be ssh://? If it's the first, I wonder if there is some other placeholder we can use that isn't actually a real scheme to make it extra clear.
  3. What are some cases that would hit the default case? Seems like we would hit it for both empty schemes and for other schemes that we don't know about. For the latter (other schemes we don't know about), it seems weird to prepend ssh://. Was that your intention?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. No, we strip the scheme when we call url.Hostname: https://github.com/kubernetes-sigs/kustomize/pull/4909/files/b235147079b094ea1dd8c4ebbb1863c2a2c7dfe6#diff-f52a1bf07c678854203647b33ccaa09d945af03c9233b3dec3a947ba27d531deR212
  2. Yes, it's just to avoid url.Parse errors. However, the correct scheme to add here is technically ssh. See my response to 3.
  3. In this function, git.NewRepoSpecFromURL must have been called successfully. Given this, the only url that hits the default case is the relative ssh case that begins with git@. Relative ssh urls have a different delimiter than full ssh urls (ones with ssh:// header), but we remove this distinction here: https://github.com/kubernetes-sigs/kustomize/pull/4909/files/b235147079b094ea1dd8c4ebbb1863c2a2c7dfe6#diff-f52a1bf07c678854203647b33ccaa09d945af03c9233b3dec3a947ba27d531deR206. Thus, we can safely add the ssh:// scheme header to urls under the default case.

}
// url.Parse will not recognize ':' delimiter that both RepoSpec and git accept.
target = strings.TrimSuffix(target, ":")
u, err := url.Parse(target)
if err != nil {
return "", errors.Wrap(err)
}
// strip scheme, userinfo, port, and any trailing slashes.
return u.Hostname(), nil
}
251 changes: 246 additions & 5 deletions api/internal/localizer/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,59 @@
package localizer //nolint:testpackage

import (
"fmt"
"os"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/require"
"sigs.k8s.io/kustomize/api/ifc"
"sigs.k8s.io/kustomize/api/internal/git"
"sigs.k8s.io/kustomize/kyaml/filesys"
)

func TestDefaultNewDirRepo(t *testing.T) {
for name, test := range map[string]struct {
url, dst string
}{
"simple": {
url: "https://github.com/org/repo?ref=value",
dst: "localized-repo-value",
},
"slashed_ref": {
url: "https://github.com/org/repo?ref=group/version",
dst: "localized-repo-group-version",
},
} {
t.Run(name, func(t *testing.T) {
repoSpec, err := git.NewRepoSpecFromURL(test.url)
require.NoError(t, err)
require.Equal(t, test.dst, defaultNewDir(&fakeLoader{t.TempDir()}, repoSpec))
})
}
}

type fakeLoader struct {
root string
}

func (fl *fakeLoader) Root() string {
return fl.root
}
func (fl *fakeLoader) Repo() string {
return fl.root
}
func (fl *fakeLoader) Load(_ string) ([]byte, error) {
return []byte{}, nil
}
func (fl *fakeLoader) New(path string) (ifc.Loader, error) {
return &fakeLoader{path}, nil
}
func (fl *fakeLoader) Cleanup() error {
return nil
}

func TestUrlBase(t *testing.T) {
require.Equal(t, "repo", urlBase("https://github.com/org/repo"))
}
Expand All @@ -34,6 +80,14 @@ func TestLocFilePath(t *testing.T) {
url: "https://raw.githubusercontent.com/org/repo/ref/path/to/file.yaml",
path: simpleJoin(t, "raw.githubusercontent.com", "org", "repo", "ref", "path", "to", "file.yaml"),
},
"http-scheme": {
url: "http://host/path",
path: simpleJoin(t, "host", "path"),
},
"extraneous_components": {
url: "http://userinfo@host:1234/path/file?query",
path: simpleJoin(t, "host", "path", "file"),
},
"empty_path": {
url: "https://host",
path: "host",
Expand All @@ -42,11 +96,7 @@ func TestLocFilePath(t *testing.T) {
url: "https://host//",
path: "host",
},
"extraneous_components": {
url: "http://userinfo@host:1234/path/file?query",
path: simpleJoin(t, "host", "path", "file"),
},
"percent-encoding": {
"percent-encoded_path": {
url: "https://host/file%2Eyaml",
path: simpleJoin(t, "host", "file%2Eyaml"),
},
Expand All @@ -64,3 +114,194 @@ func TestLocFilePath(t *testing.T) {
})
}
}

func TestLocFilePathColon(t *testing.T) {
req := require.New(t)

// The colon is special because it was once used as the unix file separator.
const url = "https://[2001:4860:4860::8888]/file.yaml"
const host = "2001:4860:4860::8888"
const file = "file.yaml"
req.Equal(simpleJoin(t, LocalizeDir, host, file), locFilePath(url))

fSys := filesys.MakeFsOnDisk()
targetDir := simpleJoin(t, t.TempDir(), host)

// We check that we can create single directory, meaning ':' not used as file separator.
req.NoError(fSys.Mkdir(targetDir))
_, err := fSys.Create(simpleJoin(t, targetDir, file))
req.NoError(err)

// We check that the directory with such name is readable.
files, err := fSys.ReadDir(targetDir)
req.NoError(err)
req.Equal([]string{file}, files)
}

func TestLocFilePath_SpecialChar(t *testing.T) {
req := require.New(t)

// The wild card character is one of the legal uri characters with more meaning
// to the system, so we test it here.
const wildcard = "*"
req.Equal(simpleJoin(t, LocalizeDir, "host", wildcard), locFilePath("https://host/*"))

fSys := filesys.MakeFsOnDisk()
testDir := t.TempDir()
req.NoError(fSys.Mkdir(simpleJoin(t, testDir, "a")))
req.NoError(fSys.WriteFile(simpleJoin(t, testDir, "b"), []byte{}))

// We check that we can create and read from wild card-named file.
// We check that the file system is not matching it to existing file names.
req.NoError(fSys.WriteFile(simpleJoin(t, testDir, wildcard), []byte("test")))
content, err := fSys.ReadFile(simpleJoin(t, testDir, wildcard))
req.NoError(err)
req.Equal("test", string(content))
}

func TestLocFilePath_SpecialFiles(t *testing.T) {
for name, tFSys := range map[string]struct {
urlPath string
pathDir, pathFile string
}{
"windows_reserved_name": {
urlPath: "/aux/file",
pathDir: "aux",
pathFile: "file",
},
"hidden_files": {
urlPath: "/.../.file",
pathDir: "...",
pathFile: ".file",
},
} {
t.Run(name, func(t *testing.T) {
req := require.New(t)

expectedPath := simpleJoin(t, LocalizeDir, "host", tFSys.pathDir, tFSys.pathFile)
req.Equal(expectedPath, locFilePath("https://host"+tFSys.urlPath))

fSys := filesys.MakeFsOnDisk()
targetDir := simpleJoin(t, t.TempDir(), tFSys.pathDir)
req.NoError(fSys.Mkdir(targetDir))
req.NoError(fSys.WriteFile(simpleJoin(t, targetDir, tFSys.pathFile), []byte("test")))

content, err := fSys.ReadFile(simpleJoin(t, targetDir, tFSys.pathFile))
req.NoError(err)
req.Equal([]byte("test"), content)
})
}
}

func makeConfirmedDir(t *testing.T) (filesys.FileSystem, filesys.ConfirmedDir) {
t.Helper()

fSys := filesys.MakeFsOnDisk()
testDir, err := filesys.NewTmpConfirmedDir()
require.NoError(t, err)
t.Cleanup(func() {
_ = fSys.RemoveAll(testDir.String())
})

return fSys, testDir
}

func TestLocRootPath_URLComponents(t *testing.T) {
for name, test := range map[string]struct {
urlf, path string
}{
"ssh": {
urlf: "ssh://git@github.com/org/repo//%s?ref=value",
path: simpleJoin(t, "github.com", "org", "repo", "value"),
},
"rel_ssh": {
urlf: "git@github.com:org/repo//%s?ref=value",
path: simpleJoin(t, "github.com", "org", "repo", "value"),
},
"https": {
urlf: "https://gitlab.com/org/repo//%s?ref=value",
path: simpleJoin(t, "gitlab.com", "org", "repo", "value"),
},
"file": {
urlf: "file:///var/run/repo//%s?ref=value",
path: simpleJoin(t, FileSchemeDir, "var", "run", "repo", "value"),
},
"gh_shorthand": {
urlf: "gh:org/repo//%s?ref=value",
path: simpleJoin(t, "gh", "org", "repo", "value"),
},
"IPv6": {
urlf: "https://[2001:4860:4860::8888]/org/repo//%s?ref=value",
path: simpleJoin(t, "2001:4860:4860::8888", "org", "repo", "value"),
},
"port": {
urlf: "https://localhost.com:8080/org/repo//%s?ref=value",
path: simpleJoin(t, "localhost.com", "org", "repo", "value"),
},
"no_org": {
urlf: "https://github.com/repo//%s?ref=value",
path: simpleJoin(t, "github.com", "repo", "value"),
},
".git_suffix": {
urlf: "https://github.com/org1/org2/repo.git//%s?ref=value",
path: simpleJoin(t, "github.com", "org1", "org2", "repo", "value"),
},
"dot-segments": {
urlf: "https://github.com/./../org/../org/repo.git//%s?ref=value",
path: simpleJoin(t, "github.com", "org", "repo", "value"),
},
"no_path_delimiter": {
urlf: "https://github.com/org/repo/%s?ref=value",
path: simpleJoin(t, "github.com", "org", "repo", "value"),
},
"illegal_windows_dir": {
urlf: "https://gitlab.com/org./repo..git//%s?ref=value",
path: simpleJoin(t, "gitlab.com", "org.", "repo.", "value"),
},
"ref_has_slash": {
urlf: "https://gitlab.com/org/repo//%s?ref=group/version/kind",
path: simpleJoin(t, "gitlab.com", "org", "repo", "group", "version", "kind"),
},
} {
t.Run(name, func(t *testing.T) {
u := fmt.Sprintf(test.urlf, "path/to/root")
path := simpleJoin(t, LocalizeDir, test.path, "path", "to", "root")

fSys, testDir := makeConfirmedDir(t)
repoDir := simpleJoin(t, testDir.String(), "repo_random-hash")
require.NoError(t, fSys.Mkdir(repoDir))
rootDir := simpleJoin(t, repoDir, "path", "to", "root")
require.NoError(t, fSys.MkdirAll(rootDir))

actual, err := locRootPath(u, repoDir, filesys.ConfirmedDir(rootDir), fSys)
require.NoError(t, err)
require.Equal(t, path, actual)

require.NoError(t, fSys.MkdirAll(simpleJoin(t, testDir.String(), path)))
})
}
}

func TestLocRootPath_Repo(t *testing.T) {
const url = "https://github.com/org/repo?ref=value"
expected := simpleJoin(t, LocalizeDir, "github.com", "org", "repo", "value")

fSys, testDir := makeConfirmedDir(t)
actual, err := locRootPath(url, testDir.String(), testDir, fSys)
require.NoError(t, err)
require.Equal(t, expected, actual)
}

func TestLocRootPath_SymlinkPath(t *testing.T) {
const url = "https://github.com/org/repo//symlink?ref=value"

fSys, repoDir := makeConfirmedDir(t)
rootDir := simpleJoin(t, repoDir.String(), "actual-root")
require.NoError(t, fSys.Mkdir(rootDir))
require.NoError(t, os.Symlink(rootDir, simpleJoin(t, repoDir.String(), "symlink")))

expected := simpleJoin(t, LocalizeDir, "github.com", "org", "repo", "value", "actual-root")
actual, err := locRootPath(url, repoDir.String(), filesys.ConfirmedDir(rootDir), fSys)
require.NoError(t, err)
require.Equal(t, expected, actual)
}