-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add module source address parsing (#7)
* add module source addr file This enabled parsing of module source addresses for registry modules. * add module source example to readme * remove ParseRawModuleSource and local source parsing
- Loading branch information
Showing
4 changed files
with
594 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,241 @@ | ||
package tfaddr | ||
|
||
import ( | ||
"fmt" | ||
"path" | ||
"regexp" | ||
"strings" | ||
|
||
svchost "github.com/hashicorp/terraform-svchost" | ||
) | ||
|
||
// ModuleSourceRegistry is representing a module listed in a Terraform module | ||
// registry. | ||
type ModuleSourceRegistry struct { | ||
// PackageAddr is the registry package that the target module belongs to. | ||
// The module installer must translate this into a ModuleSourceRemote | ||
// using the registry API and then take that underlying address's | ||
// PackageAddr in order to find the actual package location. | ||
PackageAddr ModuleRegistryPackage | ||
|
||
// If Subdir is non-empty then it represents a sub-directory within the | ||
// remote package that the registry address eventually resolves to. | ||
// This will ultimately become the suffix of the Subdir of the | ||
// ModuleSourceRemote that the registry address translates to. | ||
// | ||
// Subdir uses a normalized forward-slash-based path syntax within the | ||
// virtual filesystem represented by the final package. It will never | ||
// include `../` or `./` sequences. | ||
Subdir string | ||
} | ||
|
||
// DefaultModuleRegistryHost is the hostname used for registry-based module | ||
// source addresses that do not have an explicit hostname. | ||
const DefaultModuleRegistryHost = svchost.Hostname("registry.terraform.io") | ||
|
||
var moduleRegistryNamePattern = regexp.MustCompile("^[0-9A-Za-z](?:[0-9A-Za-z-_]{0,62}[0-9A-Za-z])?$") | ||
var moduleRegistryTargetSystemPattern = regexp.MustCompile("^[0-9a-z]{1,64}$") | ||
|
||
// ParseRawModuleSourceRegistry only accepts module registry addresses, and | ||
// will reject any other address type. | ||
func ParseRawModuleSourceRegistry(raw string) (ModuleSourceRegistry, error) { | ||
var err error | ||
|
||
var subDir string | ||
raw, subDir = splitPackageSubdir(raw) | ||
if strings.HasPrefix(subDir, "../") { | ||
return ModuleSourceRegistry{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir) | ||
} | ||
|
||
parts := strings.Split(raw, "/") | ||
// A valid registry address has either three or four parts, because the | ||
// leading hostname part is optional. | ||
if len(parts) != 3 && len(parts) != 4 { | ||
return ModuleSourceRegistry{}, fmt.Errorf("a module registry source address must have either three or four slash-separated components") | ||
} | ||
|
||
host := DefaultModuleRegistryHost | ||
if len(parts) == 4 { | ||
host, err = svchost.ForComparison(parts[0]) | ||
if err != nil { | ||
// The svchost library doesn't produce very good error messages to | ||
// return to an end-user, so we'll use some custom ones here. | ||
switch { | ||
case strings.Contains(parts[0], "--"): | ||
// Looks like possibly punycode, which we don't allow here | ||
// to ensure that source addresses are written readably. | ||
return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q; internationalized domain names must be given as direct unicode characters, not in punycode", parts[0]) | ||
default: | ||
return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q", parts[0]) | ||
} | ||
} | ||
if !strings.Contains(host.String(), ".") { | ||
return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname: must contain at least one dot") | ||
} | ||
// Discard the hostname prefix now that we've processed it | ||
parts = parts[1:] | ||
} | ||
|
||
ret := ModuleSourceRegistry{ | ||
PackageAddr: ModuleRegistryPackage{ | ||
Host: host, | ||
}, | ||
|
||
Subdir: subDir, | ||
} | ||
|
||
if host == svchost.Hostname("github.com") || host == svchost.Hostname("bitbucket.org") { | ||
return ret, fmt.Errorf("can't use %q as a module registry host, because it's reserved for installing directly from version control repositories", host) | ||
} | ||
|
||
if ret.PackageAddr.Namespace, err = parseModuleRegistryName(parts[0]); err != nil { | ||
if strings.Contains(parts[0], ".") { | ||
// Seems like the user omitted one of the latter components in | ||
// an address with an explicit hostname. | ||
return ret, fmt.Errorf("source address must have three more components after the hostname: the namespace, the name, and the target system") | ||
} | ||
return ret, fmt.Errorf("invalid namespace %q: %s", parts[0], err) | ||
} | ||
if ret.PackageAddr.Name, err = parseModuleRegistryName(parts[1]); err != nil { | ||
return ret, fmt.Errorf("invalid module name %q: %s", parts[1], err) | ||
} | ||
if ret.PackageAddr.TargetSystem, err = parseModuleRegistryTargetSystem(parts[2]); err != nil { | ||
if strings.Contains(parts[2], "?") { | ||
// The user was trying to include a query string, probably? | ||
return ret, fmt.Errorf("module registry addresses may not include a query string portion") | ||
} | ||
return ret, fmt.Errorf("invalid target system %q: %s", parts[2], err) | ||
} | ||
|
||
return ret, nil | ||
} | ||
|
||
// parseModuleRegistryName validates and normalizes a string in either the | ||
// "namespace" or "name" position of a module registry source address. | ||
func parseModuleRegistryName(given string) (string, error) { | ||
// Similar to the names in provider source addresses, we defined these | ||
// to be compatible with what filesystems and typical remote systems | ||
// like GitHub allow in names. Unfortunately we didn't end up defining | ||
// these exactly equivalently: provider names can only use dashes as | ||
// punctuation, whereas module names can use underscores. So here we're | ||
// using some regular expressions from the original module source | ||
// implementation, rather than using the IDNA rules as we do in | ||
// ParseProviderPart. | ||
|
||
if !moduleRegistryNamePattern.MatchString(given) { | ||
return "", fmt.Errorf("must be between one and 64 characters, including ASCII letters, digits, dashes, and underscores, where dashes and underscores may not be the prefix or suffix") | ||
} | ||
|
||
// We also skip normalizing the name to lowercase, because we historically | ||
// didn't do that and so existing module registries might be doing | ||
// case-sensitive matching. | ||
return given, nil | ||
} | ||
|
||
// parseModuleRegistryTargetSystem validates and normalizes a string in the | ||
// "target system" position of a module registry source address. This is | ||
// what we historically called "provider" but never actually enforced as | ||
// being a provider address, and now _cannot_ be a provider address because | ||
// provider addresses have three slash-separated components of their own. | ||
func parseModuleRegistryTargetSystem(given string) (string, error) { | ||
// Similar to the names in provider source addresses, we defined these | ||
// to be compatible with what filesystems and typical remote systems | ||
// like GitHub allow in names. Unfortunately we didn't end up defining | ||
// these exactly equivalently: provider names can't use dashes or | ||
// underscores. So here we're using some regular expressions from the | ||
// original module source implementation, rather than using the IDNA rules | ||
// as we do in ParseProviderPart. | ||
|
||
if !moduleRegistryTargetSystemPattern.MatchString(given) { | ||
return "", fmt.Errorf("must be between one and 64 ASCII letters or digits") | ||
} | ||
|
||
// We also skip normalizing the name to lowercase, because we historically | ||
// didn't do that and so existing module registries might be doing | ||
// case-sensitive matching. | ||
return given, nil | ||
} | ||
|
||
// String returns a full representation of the address, including any | ||
// additional components that are typically implied by omission in | ||
// user-written addresses. | ||
// | ||
// We typically use this longer representation in error message, in case | ||
// the inclusion of normally-omitted components is helpful in debugging | ||
// unexpected behavior. | ||
func (s ModuleSourceRegistry) String() string { | ||
if s.Subdir != "" { | ||
return s.PackageAddr.String() + "//" + s.Subdir | ||
} | ||
return s.PackageAddr.String() | ||
} | ||
|
||
// ForDisplay is similar to String but instead returns a representation of | ||
// the idiomatic way to write the address in configuration, omitting | ||
// components that are commonly just implied in addresses written by | ||
// users. | ||
// | ||
// We typically use this shorter representation in informational messages, | ||
// such as the note that we're about to start downloading a package. | ||
func (s ModuleSourceRegistry) ForDisplay() string { | ||
if s.Subdir != "" { | ||
return s.PackageAddr.ForDisplay() + "//" + s.Subdir | ||
} | ||
return s.PackageAddr.ForDisplay() | ||
} | ||
|
||
// splitPackageSubdir detects whether the given address string has a | ||
// subdirectory portion, and if so returns a non-empty subDir string | ||
// along with the trimmed package address. | ||
// | ||
// If the given string doesn't have a subdirectory portion then it'll | ||
// just be returned verbatim in packageAddr, with an empty subDir value. | ||
func splitPackageSubdir(given string) (packageAddr, subDir string) { | ||
packageAddr, subDir = sourceDirSubdir(given) | ||
if subDir != "" { | ||
subDir = path.Clean(subDir) | ||
} | ||
return packageAddr, subDir | ||
} | ||
|
||
// sourceDirSubdir takes a source URL and returns a tuple of the URL without | ||
// the subdir and the subdir. | ||
// | ||
// ex: | ||
// dom.com/path/?q=p => dom.com/path/?q=p, "" | ||
// proto://dom.com/path//*?q=p => proto://dom.com/path?q=p, "*" | ||
// proto://dom.com/path//path2?q=p => proto://dom.com/path?q=p, "path2" | ||
func sourceDirSubdir(src string) (string, string) { | ||
// URL might contains another url in query parameters | ||
stop := len(src) | ||
if idx := strings.Index(src, "?"); idx > -1 { | ||
stop = idx | ||
} | ||
|
||
// Calculate an offset to avoid accidentally marking the scheme | ||
// as the dir. | ||
var offset int | ||
if idx := strings.Index(src[:stop], "://"); idx > -1 { | ||
offset = idx + 3 | ||
} | ||
|
||
// First see if we even have an explicit subdir | ||
idx := strings.Index(src[offset:stop], "//") | ||
if idx == -1 { | ||
return src, "" | ||
} | ||
|
||
idx += offset | ||
subdir := src[idx+2:] | ||
src = src[:idx] | ||
|
||
// Next, check if we have query parameters and push them onto the | ||
// URL. | ||
if idx = strings.Index(subdir, "?"); idx > -1 { | ||
query := subdir[idx:] | ||
subdir = subdir[:idx] | ||
src += query | ||
} | ||
|
||
return src, subdir | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package tfaddr | ||
|
||
import ( | ||
"strings" | ||
|
||
svchost "github.com/hashicorp/terraform-svchost" | ||
) | ||
|
||
// A ModulePackage represents a physical location where Terraform can retrieve | ||
// a module package, which is an archive, repository, or other similar | ||
// container which delivers the source code for one or more Terraform modules. | ||
// | ||
// A ModulePackage is a string in go-getter's address syntax. By convention, | ||
// we use ModulePackage-typed values only for the result of successfully | ||
// running the go-getter "detectors", which produces an address string which | ||
// includes an explicit installation method prefix along with an address | ||
// string in the format expected by that installation method. | ||
// | ||
// Note that although the "detector" phase of go-getter does do some simple | ||
// normalization in certain cases, it isn't generally possible to compare | ||
// two ModulePackage values to decide if they refer to the same package. Two | ||
// equal ModulePackage values represent the same package, but there might be | ||
// other non-equal ModulePackage values that also refer to that package, and | ||
// there is no reliable way to determine that. | ||
// | ||
// Don't convert a user-provided string directly to ModulePackage. Instead, | ||
// use ParseModuleSource with a remote module address and then access the | ||
// ModulePackage value from the result, making sure to also handle the | ||
// selected subdirectory if any. You should convert directly to ModulePackage | ||
// only for a string that is hard-coded into the program (e.g. in a unit test) | ||
// where you've ensured that it's already in the expected syntax. | ||
type ModulePackage string | ||
|
||
func (p ModulePackage) String() string { | ||
return string(p) | ||
} | ||
|
||
// A ModuleRegistryPackage is an extra indirection over a ModulePackage where | ||
// we use a module registry to translate a more symbolic address (and | ||
// associated version constraint given out of band) into a physical source | ||
// location. | ||
// | ||
// ModuleRegistryPackage is distinct from ModulePackage because they have | ||
// disjoint use-cases: registry package addresses are only used to query a | ||
// registry in order to find a real module package address. These being | ||
// distinct is intended to help future maintainers more easily follow the | ||
// series of steps in the module installer, with the help of the type checker. | ||
type ModuleRegistryPackage struct { | ||
Host svchost.Hostname | ||
Namespace string | ||
Name string | ||
TargetSystem string | ||
} | ||
|
||
func (s ModuleRegistryPackage) String() string { | ||
// Note: we're using the "display" form of the hostname here because | ||
// for our service hostnames "for display" means something different: | ||
// it means to render non-ASCII characters directly as Unicode | ||
// characters, rather than using the "punycode" representation we | ||
// use for internal processing, and so the "display" representation | ||
// is actually what users would write in their configurations. | ||
return s.Host.ForDisplay() + "/" + s.ForRegistryProtocol() | ||
} | ||
|
||
func (s ModuleRegistryPackage) ForDisplay() string { | ||
if s.Host == DefaultModuleRegistryHost { | ||
return s.ForRegistryProtocol() | ||
} | ||
return s.Host.ForDisplay() + "/" + s.ForRegistryProtocol() | ||
} | ||
|
||
// ForRegistryProtocol returns a string representation of just the namespace, | ||
// name, and target system portions of the address, always omitting the | ||
// registry hostname and the subdirectory portion, if any. | ||
// | ||
// This is primarily intended for generating addresses to send to the | ||
// registry in question via the registry protocol, since the protocol | ||
// skips sending the registry its own hostname as part of identifiers. | ||
func (s ModuleRegistryPackage) ForRegistryProtocol() string { | ||
var buf strings.Builder | ||
buf.WriteString(s.Namespace) | ||
buf.WriteByte('/') | ||
buf.WriteString(s.Name) | ||
buf.WriteByte('/') | ||
buf.WriteString(s.TargetSystem) | ||
return buf.String() | ||
} |
Oops, something went wrong.