diff --git a/converter.py b/converter.py new file mode 100755 index 00000000000..e9a6cb22141 --- /dev/null +++ b/converter.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +import json +import yaml +import sys +import argparse +from datetime import datetime, timezone +import dbt.clients.registry as registry + + +def yaml_type(fname): + with open(fname) as f: + return yaml.load(f) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--project", type=yaml_type, default="dbt_project.yml") + parser.add_argument("--namespace", required=True) + return parser.parse_args() + + +def get_full_name(args): + return "{}/{}".format(args.namespace, args.project["name"]) + + +def init_project_in_packages(args, packages): + full_name = get_full_name(args) + if full_name not in packages: + packages[full_name] = { + "name": args.project["name"], + "namespace": args.namespace, + "latest": args.project["version"], + "assets": {}, + "versions": {}, + } + return packages[full_name] + + +def add_version_to_package(args, project_json): + project_json["versions"][args.project["version"]] = { + "id": "{}/{}".format(get_full_name(args), args.project["version"]), + "name": args.project["name"], + "version": args.project["version"], + "description": "", + "published_at": datetime.now(timezone.utc).astimezone().isoformat(), + "packages": args.project.get("packages") or [], + "works_with": [], + "_source": { + "type": "github", + "url": "", + "readme": "", + }, + "downloads": { + "tarball": "", + "format": "tgz", + "sha1": "", + }, + } + + +def main(): + args = parse_args() + packages = registry.packages() + project_json = init_project_in_packages(args, packages) + if args.project["version"] in project_json["versions"]: + raise Exception("Version {} already in packages JSON" + .format(args.project["version"]), + file=sys.stderr) + add_version_to_package(args, project_json) + print(json.dumps(packages, indent=2)) + +if __name__ == "__main__": + main() diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 9d2e6eb7b97..1eb9ef9c0c6 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -372,6 +372,12 @@ def get_columns_in_table(cls, profile, schema_name, table_name, raise dbt.exceptions.NotImplementedException( '`get_columns_in_table` is not implemented for this adapter!') + @classmethod + def get_columns_in_table(cls, profile, schema_name, table_name, + model_name=None): + raise dbt.exceptions.NotImplementedException( + '`get_columns_in_table` is not implemented for this adapter!') + @classmethod def check_schema_exists(cls, profile, schema, model_name=None): conn = cls.get_connection(profile, model_name) diff --git a/dbt/clients/git.py b/dbt/clients/git.py index 9183acef4f3..ab94603d03f 100644 --- a/dbt/clients/git.py +++ b/dbt/clients/git.py @@ -1,3 +1,4 @@ +import re import os.path from dbt.clients.system import run_cmd, rmdir @@ -29,7 +30,7 @@ def checkout(cwd, repo, branch=None): if branch is None: branch = 'master' - logger.info(' Checking out branch {}.'.format(branch)) + logger.debug(' Checking out branch {}.'.format(branch)) run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch]) run_cmd(cwd, ['git', 'fetch', '--tags', '--depth', '1', 'origin', branch]) @@ -59,3 +60,32 @@ def get_current_sha(cwd): def remove_remote(cwd): return run_cmd(cwd, ['git', 'remote', 'rm', 'origin']) + + +def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False, + branch=None): + _, err = clone(repo, cwd, dirname=dirname, remove_git_dir=remove_git_dir) + exists = re.match("fatal: destination path '(.+)' already exists", + err.decode('utf-8')) + directory = None + start_sha = None + if exists: + directory = exists.group(1) + logger.debug('Updating existing dependency %s.', directory) + else: + matches = re.match("Cloning into '(.+)'", err.decode('utf-8')) + directory = matches.group(1) + logger.debug('Pulling new dependency %s.', directory) + full_path = os.path.join(cwd, directory) + start_sha = get_current_sha(full_path) + checkout(full_path, repo, branch) + end_sha = get_current_sha(full_path) + if exists: + if start_sha == end_sha: + logger.debug(' Already at %s, nothing to do.', start_sha[:7]) + else: + logger.debug(' Updated checkout from %s to %s.', + start_sha[:7], end_sha[:7]) + else: + logger.debug(' Checked out at %s.', end_sha[:7]) + return directory diff --git a/dbt/clients/registry.py b/dbt/clients/registry.py new file mode 100644 index 00000000000..0873cc509ad --- /dev/null +++ b/dbt/clients/registry.py @@ -0,0 +1,61 @@ +from functools import wraps +import six +import requests +from dbt.exceptions import RegistryException +from dbt.utils import memoized +import os + +if os.getenv('DBT_PACKAGE_HUB_URL'): + DEFAULT_REGISTRY_BASE_URL = os.getenv('DBT_PACKAGE_HUB_URL') +else: + DEFAULT_REGISTRY_BASE_URL = 'https://hub.getdbt.com/' + + +def _get_url(url, registry_base_url=None): + if registry_base_url is None: + registry_base_url = DEFAULT_REGISTRY_BASE_URL + + return '{}{}'.format(registry_base_url, url) + + +def _wrap_exceptions(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + try: + return fn(*args, **kwargs) + except requests.exceptions.ConnectionError as e: + six.raise_from( + RegistryException('Unable to connect to registry hub'), e) + return wrapper + + +@_wrap_exceptions +def _get(path, registry_base_url=None): + url = _get_url(path, registry_base_url) + resp = requests.get(url) + resp.raise_for_status() + return resp.json() + + +def index(registry_base_url=None): + return _get('api/v1/index.json', registry_base_url) + + +index_cached = memoized(index) + + +def packages(registry_base_url=None): + return _get('api/v1/packages.json', registry_base_url) + + +def package(name, registry_base_url=None): + return _get('api/v1/{}.json'.format(name), registry_base_url) + + +def package_version(name, version, registry_base_url=None): + return _get('api/v1/{}/{}.json'.format(name, version), registry_base_url) + + +def get_available_versions(name): + response = package(name) + return list(response['versions']) diff --git a/dbt/clients/system.py b/dbt/clients/system.py index f94108f7e66..de4f4c7b335 100644 --- a/dbt/clients/system.py +++ b/dbt/clients/system.py @@ -5,8 +5,12 @@ import shutil import subprocess import sys +import tarfile +import requests +import stat import dbt.compat +import dbt.exceptions from dbt.logger import GLOBAL_LOGGER as logger @@ -92,6 +96,20 @@ def make_file(path, contents='', overwrite=False): return False +def make_symlink(source, link_path): + """ + Create a symlink at `link_path` referring to `source`. + """ + if not supports_symlinks(): + dbt.exceptions.system_error('create a symbolic link') + + return os.symlink(source, link_path) + + +def supports_symlinks(): + return getattr(os, "symlink", None) is not None + + def write_file(path, contents=''): make_directory(os.path.dirname(path)) dbt.compat.write_file(path, contents) @@ -99,12 +117,40 @@ def write_file(path, contents=''): return True +def _windows_rmdir_readonly(func, path, exc): + exception_val = exc[1] + if exception_val.errno == errno.EACCES: + os.chmod(path, stat.S_IWUSR) + func(path) + else: + raise + + def rmdir(path): """ - Make a file at `path` assuming that the directory it resides in already - exists. The file is saved with contents `contents` + Recursively deletes a directory. Includes an error handler to retry with + different permissions on Windows. Otherwise, removing directories (eg. + cloned via git) can cause rmtree to throw a PermissionError exception """ - return shutil.rmtree(path) + logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform)) + if sys.platform == 'win32': + onerror = _windows_rmdir_readonly + else: + onerror = None + + return shutil.rmtree(path, onerror=onerror) + + +def remove_file(path): + return os.remove(path) + + +def path_exists(path): + return os.path.lexists(path) + + +def path_is_symlink(path): + return os.path.islink(path) def open_dir_cmd(): @@ -133,3 +179,27 @@ def run_cmd(cwd, cmd): logger.debug('STDERR: "{}"'.format(err)) return out, err + + +def download(url, path): + response = requests.get(url) + with open(path, 'wb') as handle: + for block in response.iter_content(1024*64): + handle.write(block) + + +def rename(from_path, to_path, force=False): + if os.path.exists(to_path) and force: + rmdir(to_path) + os.rename(from_path, to_path) + + +def untar_package(tar_path, dest_dir, rename_to=None): + tar_dir_name = None + with tarfile.open(tar_path, 'r') as tarball: + tarball.extractall(dest_dir) + tar_dir_name = os.path.commonprefix(tarball.getnames()) + if rename_to: + downloaded_path = os.path.join(dest_dir, tar_dir_name) + desired_path = os.path.join(dest_dir, rename_to) + dbt.clients.system.rename(downloaded_path, desired_path, force=True) diff --git a/dbt/context/common.py b/dbt/context/common.py index 64d62c04b45..a7de76f343e 100644 --- a/dbt/context/common.py +++ b/dbt/context/common.py @@ -1,5 +1,6 @@ import json import os +import pytz import voluptuous from dbt.adapters.factory import get_adapter diff --git a/dbt/deprecations.py b/dbt/deprecations.py index 23566b080af..e7c43fb6fe0 100644 --- a/dbt/deprecations.py +++ b/dbt/deprecations.py @@ -11,21 +11,19 @@ def show(self, *args, **kwargs): logger.info("* Deprecation Warning: {}\n".format(desc)) active_deprecations.add(self.name) -# Leaving this as an example. Make sure to add new ones to deprecations_list -# - Connor -# -# class DBTRunTargetDeprecation(DBTDeprecation): -# name = 'run-target' -# description = """profiles.yml configuration option 'run-target' is -# deprecated. Please use 'target' instead. The 'run-target' option will be -# removed (in favor of 'target') in DBT version 0.7.0""" + +class DBTRepositoriesDeprecation(DBTDeprecation): + name = "repositories" + description = """dbt_project.yml configuration option 'repositories' is + deprecated. Please use 'packages' instead. The 'repositories' option will + be removed in a later version of DBT.""" class SeedDropExistingDeprecation(DBTDeprecation): name = 'drop-existing' - description = """The --drop-existing argument has been deprecated. Please - use --full-refresh instead. The --drop-existing option will be removed in a - future version of dbt.""" + description = """The --drop-existing argument to `dbt seed` has been + deprecated. Please use --full-refresh instead. The --drop-existing option + will be removed in a future version of dbt.""" def warn(name, *args, **kwargs): @@ -44,6 +42,7 @@ def warn(name, *args, **kwargs): active_deprecations = set() deprecations_list = [ + DBTRepositoriesDeprecation(), SeedDropExistingDeprecation() ] diff --git a/dbt/exceptions.py b/dbt/exceptions.py index a827967f237..c5076f5132e 100644 --- a/dbt/exceptions.py +++ b/dbt/exceptions.py @@ -105,6 +105,23 @@ class ValidationException(RuntimeException): pass +class ParsingException(Exception): + pass + + +class DependencyException(Exception): + pass + + +class SemverException(Exception): + def __init__(self, msg=None): + self.msg = msg + + +class VersionsNotCompatibleException(SemverException): + pass + + class NotImplementedException(Exception): pass @@ -124,6 +141,10 @@ def raise_database_error(msg, node=None): raise DatabaseException(msg, node) +def raise_dependency_error(msg): + raise DependencyException(msg) + + def ref_invalid_args(model, args): raise_compiler_error( "ref() takes at most two arguments ({} given)".format(len(args)), @@ -233,7 +254,33 @@ def missing_relation(relation_name, model=None): model) +def package_not_found(package_name): + raise_dependency_error( + "Package {} was not found in the package index".format(package_name)) + + +def package_version_not_found(package_name, version_range, available_versions): + base_msg = ('Could not find a matching version for package {}\n' + ' Requested range: {}\n' + ' Available versions: {}') + raise_dependency_error(base_msg.format(package_name, + version_range, + available_versions)) + + def invalid_materialization_argument(name, argument): raise_compiler_error( "materialization '{}' received unknown argument '{}'." .format(name, argument)) + + +def system_error(operation_name): + raise_compiler_error( + "dbt encountered an error when attempting to {}. " + "If this error persists, please create an issue at: \n\n" + "https://github.com/fishtown-analytics/dbt" + .format(operation_name)) + + +class RegistryException(Exception): + pass diff --git a/dbt/semver.py b/dbt/semver.py new file mode 100644 index 00000000000..006513fd315 --- /dev/null +++ b/dbt/semver.py @@ -0,0 +1,398 @@ +import re +import logging + +from dbt.exceptions import VersionsNotCompatibleException +import dbt.utils + +logger = logging.getLogger(__name__) + +_MATCHERS = r"(?P\>=|\>|\<|\<=|=)?" +_NUM_NO_LEADING_ZEROS = r"(0|[1-9][0-9]*)" +_ALPHA = r"[0-9A-Za-z-]*" +_ALPHA_NO_LEADING_ZEROS = r"(0|[1-9A-Za-z-][0-9A-Za-z-]*)" + +_BASE_VERSION_REGEX = r""" +(?P{num_no_leading_zeros})\. +(?P{num_no_leading_zeros})\. +(?P{num_no_leading_zeros}) +""".format(num_no_leading_zeros=_NUM_NO_LEADING_ZEROS) + +_VERSION_EXTRA_REGEX = r""" +(\- + (?P + {alpha_no_leading_zeros}(\.{alpha_no_leading_zeros})*))? +(\+ + (?P + {alpha}(\.{alpha})*))? +""".format( + alpha_no_leading_zeros=_ALPHA_NO_LEADING_ZEROS, + alpha=_ALPHA) + +_VERSION_REGEX = re.compile(r""" +^ +{matchers} +{base_version_regex} +{version_extra_regex} +$ +""".format( + matchers=_MATCHERS, + base_version_regex=_BASE_VERSION_REGEX, + version_extra_regex=_VERSION_EXTRA_REGEX), + re.VERBOSE) + + +class Matchers: + GREATER_THAN = '>' + GREATER_THAN_OR_EQUAL = '>=' + LESS_THAN = '<' + LESS_THAN_OR_EQUAL = '<=' + EXACT = '=' + + +class VersionRange(dbt.utils.AttrDict): + + def _try_combine_exact(self, a, b): + if a.compare(b) == 0: + return a + else: + raise VersionsNotCompatibleException() + + def _try_combine_lower_bound_with_exact(self, lower, exact): + comparison = lower.compare(exact) + + if (comparison < 0 or + (comparison == 0 and + lower.matcher == Matchers.GREATER_THAN_OR_EQUAL)): + return exact + + raise VersionsNotCompatibleException() + + def _try_combine_lower_bound(self, a, b): + if b.is_unbounded: + return a + elif a.is_unbounded: + return b + + if not (a.is_exact or b.is_exact): + comparison = (a.compare(b) < 0) + + if comparison: + return b + else: + return a + + elif a.is_exact: + return self._try_combine_lower_bound_with_exact(b, a) + + elif b.is_exact: + return self._try_combine_lower_bound_with_exact(a, b) + + def _try_combine_upper_bound_with_exact(self, upper, exact): + comparison = upper.compare(exact) + + if (comparison > 0 or + (comparison == 0 and + upper.matcher == Matchers.LESS_THAN_OR_EQUAL)): + return exact + + raise VersionsNotCompatibleException() + + def _try_combine_upper_bound(self, a, b): + if b.is_unbounded: + return a + elif a.is_unbounded: + return b + + if not (a.is_exact or b.is_exact): + comparison = (a.compare(b) > 0) + + if comparison: + return b + else: + return a + + elif a.is_exact: + return self._try_combine_upper_bound_with_exact(b, a) + + elif b.is_exact: + return self._try_combine_upper_bound_with_exact(a, b) + + def reduce(self, other): + start = None + + if(self.start.is_exact and other.start.is_exact): + start = end = self._try_combine_exact(self.start, other.start) + + else: + start = self._try_combine_lower_bound(self.start, other.start) + end = self._try_combine_upper_bound(self.end, other.end) + + if start.compare(end) > 0: + raise VersionsNotCompatibleException() + + return VersionRange(start=start, end=end) + + def __str__(self): + result = [] + + if self.start.is_unbounded and self.end.is_unbounded: + return 'ANY' + + if not self.start.is_unbounded: + result.append(self.start.to_version_string()) + + if not self.end.is_unbounded: + result.append(self.end.to_version_string()) + + return ', '.join(result) + + def to_version_string_pair(self): + to_return = [] + + if not self.start.is_unbounded: + to_return.append(self.start.to_version_string()) + + if not self.end.is_unbounded: + to_return.append(self.end.to_version_string()) + + return to_return + + +class VersionSpecifier(dbt.utils.AttrDict): + + def __init__(self, *args, **kwargs): + super(VersionSpecifier, self).__init__(*args, **kwargs) + + if self.matcher is None: + self.matcher = Matchers.EXACT + + def to_version_string(self, skip_matcher=False): + prerelease = '' + build = '' + matcher = '' + + if self.prerelease: + prerelease = '-' + self.prerelease + + if self.build: + build = '+' + self.build + + if not skip_matcher: + matcher = self.matcher + return '{}{}.{}.{}{}{}'.format( + matcher, + self.major, + self.minor, + self.patch, + prerelease, + build) + + @classmethod + def from_version_string(cls, version_string): + match = _VERSION_REGEX.match(version_string) + + if not match: + raise dbt.exceptions.SemverException( + 'Could not parse version "{}"'.format(version_string)) + + return VersionSpecifier(match.groupdict()) + + def __str__(self): + return self.to_version_string() + + def to_range(self): + range_start = UnboundedVersionSpecifier() + range_end = UnboundedVersionSpecifier() + + if self.matcher == Matchers.EXACT: + range_start = self + range_end = self + + elif self.matcher in [Matchers.GREATER_THAN, + Matchers.GREATER_THAN_OR_EQUAL]: + range_start = self + + elif self.matcher in [Matchers.LESS_THAN, + Matchers.LESS_THAN_OR_EQUAL]: + range_end = self + + return VersionRange( + start=range_start, + end=range_end) + + def compare(self, other): + if self.is_unbounded or other.is_unbounded: + return 0 + + for key in ['major', 'minor', 'patch']: + comparison = int(self[key]) - int(other[key]) + + if comparison > 0: + return 1 + elif comparison < 0: + return -1 + + equal = ((self.matcher == Matchers.GREATER_THAN_OR_EQUAL and + other.matcher == Matchers.LESS_THAN_OR_EQUAL) or + (self.matcher == Matchers.LESS_THAN_OR_EQUAL and + other.matcher == Matchers.GREATER_THAN_OR_EQUAL)) + if equal: + return 0 + + lt = ((self.matcher == Matchers.LESS_THAN and + other.matcher == Matchers.LESS_THAN_OR_EQUAL) or + (other.matcher == Matchers.GREATER_THAN and + self.matcher == Matchers.GREATER_THAN_OR_EQUAL) or + (self.is_upper_bound and other.is_lower_bound)) + if lt: + return -1 + + gt = ((other.matcher == Matchers.LESS_THAN and + self.matcher == Matchers.LESS_THAN_OR_EQUAL) or + (self.matcher == Matchers.GREATER_THAN and + other.matcher == Matchers.GREATER_THAN_OR_EQUAL) or + (self.is_lower_bound and other.is_upper_bound)) + if gt: + return 1 + + return 0 + + def __lt__(self, other): + return self.compare(other) == -1 + + def __gt__(self, other): + return self.compare(other) == 1 + + def __eq___(self, other): + return self.compare(other) == 0 + + def __cmp___(self, other): + return self.compare(other) + + @property + def is_unbounded(self): + return False + + @property + def is_lower_bound(self): + return self.matcher in [Matchers.GREATER_THAN, + Matchers.GREATER_THAN_OR_EQUAL] + + @property + def is_upper_bound(self): + return self.matcher in [Matchers.LESS_THAN, + Matchers.LESS_THAN_OR_EQUAL] + + @property + def is_exact(self): + return self.matcher == Matchers.EXACT + + +class UnboundedVersionSpecifier(VersionSpecifier): + + def __init__(self, *args, **kwargs): + super(dbt.utils.AttrDict, self).__init__(*args, **kwargs) + + def __str__(self): + return "*" + + @property + def is_unbounded(self): + return True + + @property + def is_lower_bound(self): + return False + + @property + def is_upper_bound(self): + return False + + @property + def is_exact(self): + return False + + +def reduce_versions(*args): + version_specifiers = [] + + for version in args: + if isinstance(version, UnboundedVersionSpecifier) or version is None: + continue + + elif isinstance(version, VersionSpecifier): + version_specifiers.append(version) + + elif isinstance(version, VersionRange): + if not isinstance(version.start, UnboundedVersionSpecifier): + version_specifiers.append(version.start) + + if not isinstance(version.end, UnboundedVersionSpecifier): + version_specifiers.append(version.end) + + else: + version_specifiers.append( + VersionSpecifier.from_version_string(version)) + + for version_specifier in version_specifiers: + if not isinstance(version_specifier, VersionSpecifier): + raise Exception(version_specifier) + + if not version_specifiers: + return VersionRange(start=UnboundedVersionSpecifier(), + end=UnboundedVersionSpecifier()) + + try: + to_return = version_specifiers.pop().to_range() + + for version_specifier in version_specifiers: + to_return = to_return.reduce(version_specifier.to_range()) + except VersionsNotCompatibleException as e: + raise VersionsNotCompatibleException( + 'Could not find a satisfactory version from options: {}' + .format([str(a) for a in args])) + + return to_return + + +def versions_compatible(*args): + if len(args) == 1: + return True + + try: + reduce_versions(*args) + return True + except VersionsNotCompatibleException as e: + return False + + +def find_possible_versions(requested_range, available_versions): + possible_versions = [] + + for version_string in available_versions: + version = VersionSpecifier.from_version_string(version_string) + + if(versions_compatible(version, + requested_range.start, + requested_range.end)): + possible_versions.append(version) + + sorted_versions = sorted(possible_versions, reverse=True) + return [v.to_version_string(skip_matcher=True) for v in sorted_versions] + + +def resolve_to_specific_version(requested_range, available_versions): + max_version = None + max_version_string = None + + for version_string in available_versions: + version = VersionSpecifier.from_version_string(version_string) + + if(versions_compatible(version, + requested_range.start, + requested_range.end) and + (max_version is None or max_version.compare(version) < 0)): + max_version = version + max_version_string = version_string + + return max_version_string diff --git a/dbt/task/deps.py b/dbt/task/deps.py index 050f8874fbb..a0c4f2ca27d 100644 --- a/dbt/task/deps.py +++ b/dbt/task/deps.py @@ -1,132 +1,386 @@ import os -import errno -import re +import shutil +import hashlib +import tempfile +import six +import dbt.deprecations import dbt.clients.git import dbt.clients.system -import dbt.project as project +import dbt.clients.registry as registry +from dbt.clients.yaml_helper import load_yaml_text from dbt.compat import basestring from dbt.logger import GLOBAL_LOGGER as logger +from dbt.semver import VersionSpecifier, UnboundedVersionSpecifier +from dbt.utils import AttrDict from dbt.task.base_task import BaseTask +DOWNLOADS_PATH = os.path.join(tempfile.gettempdir(), "dbt-downloads") -def folder_from_git_remote(remote_spec): - start = remote_spec.rfind('/') + 1 - end = len(remote_spec) - (4 if remote_spec.endswith('.git') else 0) - return remote_spec[start:end] +class Package(object): + def __init__(self, name): + self.name = name + self._cached_metadata = None -class DepsTask(BaseTask): - def __pull_repo(self, repo, branch=None): - modules_path = self.project['modules-path'] + def __str__(self): + version = getattr(self, 'version', None) + if not version: + return self.name + version_str = version[0] \ + if len(version) == 1 else '' + return '{}@{}'.format(self.name, version_str) - out, err = dbt.clients.git.clone(repo, modules_path) + @classmethod + def version_to_list(cls, version): + if version is None: + return [] + if not isinstance(version, (list, basestring)): + dbt.exceptions.raise_dependency_error( + 'version must be list or string, got {}' + .format(type(version))) + if not isinstance(version, list): + version = [version] + return version - exists = re.match("fatal: destination path '(.+)' already exists", - err.decode('utf-8')) + def _resolve_version(self): + pass - folder = None - start_sha = None + def resolve_version(self): + try: + self._resolve_version() + except dbt.exceptions.VersionsNotCompatibleException as e: + new_msg = ('Version error for package {}: {}' + .format(self.name, e)) + six.raise_from(dbt.exceptions.DependencyException(new_msg), e) - if exists: - folder = exists.group(1) - logger.info('Updating existing dependency {}.'.format(folder)) - else: - matches = re.match("Cloning into '(.+)'", err.decode('utf-8')) - folder = matches.group(1) - logger.info('Pulling new dependency {}.'.format(folder)) - - dependency_path = os.path.join(modules_path, folder) - start_sha = dbt.clients.git.get_current_sha(dependency_path) - dbt.clients.git.checkout(dependency_path, repo, branch) - end_sha = dbt.clients.git.get_current_sha(dependency_path) - - if exists: - if start_sha == end_sha: - logger.info(' Already at {}, nothing to do.'.format( - start_sha[:7])) + def version_name(self): + raise NotImplementedError() + + def nice_version_name(self): + raise NotImplementedError() + + def _fetch_metadata(self, project): + raise NotImplementedError() + + def fetch_metadata(self, project): + if not self._cached_metadata: + self._cached_metadata = self._fetch_metadata(project) + return self._cached_metadata + + def get_project_name(self, project): + metadata = self.fetch_metadata(project) + return metadata["name"] + + def get_installation_path(self, project): + dest_dirname = self.get_project_name(project) + return os.path.join(project['modules-path'], dest_dirname) + + +class RegistryPackage(Package): + def __init__(self, package, version): + super(RegistryPackage, self).__init__(package) + self.package = package + self._version = self._sanitize_version(version) + + @classmethod + def _sanitize_version(cls, version): + version = [v if isinstance(v, VersionSpecifier) + else VersionSpecifier.from_version_string(v) + for v in cls.version_to_list(version)] + return version or [UnboundedVersionSpecifier()] + + @property + def version(self): + return self._version + + @version.setter + def version(self, version): + self._version = self._sanitize_version(version) + + def version_name(self): + self._check_version_pinned() + version_string = self.version[0].to_version_string(skip_matcher=True) + return version_string + + def nice_version_name(self): + return "version {}".format(self.version_name()) + + def incorporate(self, other): + return RegistryPackage(self.package, self.version + other.version) + + def _check_in_index(self): + index = registry.index_cached() + if self.package not in index: + dbt.exceptions.package_not_found(self.package) + + def _resolve_version(self): + self._check_in_index() + range_ = dbt.semver.reduce_versions(*self.version) + available = registry.get_available_versions(self.package) + # for now, pick a version and then recurse. later on, + # we'll probably want to traverse multiple options + # so we can match packages. not going to make a difference + # right now. + target = dbt.semver.resolve_to_specific_version(range_, available) + if not target: + dbt.exceptions.package_version_not_found( + self.package, range_, available) + self.version = target + + def _check_version_pinned(self): + if len(self.version) != 1: + dbt.exceptions.raise_dependency_error( + 'Cannot fetch metadata until the version is pinned.') + + def _fetch_metadata(self, project): + version_string = self.version_name() + return registry.package_version(self.package, version_string) + + def install(self, project): + version_string = self.version_name() + metadata = self.fetch_metadata(project) + + tar_name = '{}.{}.tar.gz'.format(self.package, version_string) + tar_path = os.path.realpath(os.path.join(DOWNLOADS_PATH, tar_name)) + dbt.clients.system.make_directory(os.path.dirname(tar_path)) + + download_url = metadata.get('downloads').get('tarball') + dbt.clients.system.download(download_url, tar_path) + deps_path = project['modules-path'] + package_name = self.get_project_name(project) + dbt.clients.system.untar_package(tar_path, deps_path, package_name) + + +class GitPackage(Package): + def __init__(self, git, version): + super(GitPackage, self).__init__(git) + self.git = git + self._checkout_name = hashlib.md5(six.b(git)).hexdigest() + self._version = self._sanitize_version(version) + + @classmethod + def _sanitize_version(cls, version): + return cls.version_to_list(version) or ['master'] + + @property + def version(self): + return self._version + + @version.setter + def version(self, version): + self._version = self._sanitize_version(version) + + def version_name(self): + return self._version[0] + + def nice_version_name(self): + return "revision {}".format(self.version_name()) + + def incorporate(self, other): + return GitPackage(self.git, self.version + other.version) + + def _resolve_version(self): + requested = set(self.version) + if len(requested) != 1: + dbt.exceptions.raise_dependency_error( + 'git dependencies should contain exactly one version. ' + '{} contains: {}'.format(self.git, requested)) + self.version = requested.pop() + + def _checkout(self, project): + """Performs a shallow clone of the repository into the downloads + directory. This function can be called repeatedly. If the project has + already been checked out at this version, it will be a no-op. Returns + the path to the checked out directory.""" + if len(self.version) != 1: + dbt.exceptions.raise_dependency_error( + 'Cannot checkout repository until the version is pinned.') + dir_ = dbt.clients.git.clone_and_checkout( + self.git, DOWNLOADS_PATH, branch=self.version[0], + dirname=self._checkout_name) + return os.path.join(DOWNLOADS_PATH, dir_) + + def _fetch_metadata(self, project): + path = self._checkout(project) + with open(os.path.join(path, 'dbt_project.yml')) as f: + return load_yaml_text(f.read()) + + def install(self, project): + dest_path = self.get_installation_path(project) + if os.path.exists(dest_path): + dbt.clients.system.rmdir(dest_path) + shutil.move(self._checkout(project), dest_path) + + +class LocalPackage(Package): + def __init__(self, local): + super(LocalPackage, self).__init__(local) + self.local = local + + def incorporate(self, _): + return LocalPackage(self.local) + + def version_name(self): + return ''.format(self.local) + + def nice_version_name(self): + return self.version_name() + + def _fetch_metadata(self, project): + with open(os.path.join(self.local, 'dbt_project.yml')) as f: + return load_yaml_text(f.read()) + + def install(self, project): + dest_path = self.get_installation_path(project) + + can_create_symlink = dbt.clients.system.supports_symlinks() + + if dbt.clients.system.path_exists(dest_path): + if not dbt.clients.system.path_is_symlink(dest_path): + dbt.clients.system.rmdir(dest_path) else: - logger.info(' Updated checkout from {} to {}.'.format( - start_sha[:7], end_sha[:7])) + dbt.clients.system.remove_file(dest_path) + + if can_create_symlink: + logger.debug(' Creating symlink to local dependency.') + dbt.clients.system.make_symlink(self.local, dest_path) + else: - logger.info(' Checked out at {}.'.format(end_sha[:7])) + logger.debug(' Symlinks are not available on this ' + 'OS, copying dependency.') + shutil.copytree(self.local, dest_path) - return folder - def __split_at_branch(self, repo_spec): - parts = repo_spec.split("@") - error = RuntimeError( - "Invalid dep specified: '{}' -- not a repo we can clone".format( - repo_spec - ) - ) +def _parse_package(dict_): + only_1_keys = ['package', 'git', 'local'] + specified = [k for k in only_1_keys if dict_.get(k)] + if len(specified) > 1: + dbt.exceptions.raise_dependency_error( + 'Packages should not contain more than one of {}; ' + 'yours has {} of them - {}' + .format(only_1_keys, len(specified), specified)) + if dict_.get('package'): + return RegistryPackage(dict_['package'], dict_.get('version')) + if dict_.get('git'): + if dict_.get('version'): + msg = ("Keyword 'version' specified for git package {}.\nDid " + "you mean 'revision'?".format(dict_.get('git'))) + dbt.exceptions.raise_dependency_error(msg) + return GitPackage(dict_['git'], dict_.get('revision')) + if dict_.get('local'): + return LocalPackage(dict_['local']) + dbt.exceptions.raise_dependency_error( + 'Malformed package definition. Must contain package, git, or local.') + - repo = None - if repo_spec.startswith("git@"): - if len(parts) == 1: - raise error - if len(parts) == 2: - repo, branch = repo_spec, None - elif len(parts) == 3: - repo, branch = "@".join(parts[:2]), parts[2] +class PackageListing(AttrDict): + + def incorporate(self, package): + if not isinstance(package, Package): + package = _parse_package(package) + if package.name not in self: + self[package.name] = package else: - if len(parts) == 1: - repo, branch = parts[0], None - elif len(parts) == 2: - repo, branch = parts + self[package.name] = self[package.name].incorporate(package) + + @classmethod + def create(cls, parsed_yaml): + to_return = cls({}) + if not isinstance(parsed_yaml, list): + dbt.exceptions.raise_dependency_error( + 'Package definitions must be a list, got: {}' + .format(type(parsed_yaml))) + for package in parsed_yaml: + to_return.incorporate(package) + return to_return + + def incorporate_from_yaml(self, parsed_yaml): + listing = self.create(parsed_yaml) + for _, package in listing.items(): + self.incorporate(package) + - if repo is None: +def _split_at_branch(repo_spec): + parts = repo_spec.split('@') + error = RuntimeError( + "Invalid dep specified: '{}' -- not a repo we can clone".format( + repo_spec + ) + ) + repo = None + if repo_spec.startswith('git@'): + if len(parts) == 1: raise error + if len(parts) == 2: + repo, branch = repo_spec, None + elif len(parts) == 3: + repo, branch = '@'.join(parts[:2]), parts[2] + else: + if len(parts) == 1: + repo, branch = parts[0], None + elif len(parts) == 2: + repo, branch = parts + if repo is None: + raise error + return repo, branch + + +def _convert_repo(repo_spec): + repo, branch = _split_at_branch(repo_spec) + return { + 'git': repo, + 'revision': branch, + } + - return repo, branch - - def __pull_deps_recursive(self, repos, processed_repos=None, i=0): - if processed_repos is None: - processed_repos = set() - for repo_string in repos: - repo, branch = self.__split_at_branch(repo_string) - repo_folder = folder_from_git_remote(repo) - - try: - if repo_folder in processed_repos: - logger.info( - "skipping already processed dependency {}" - .format(repo_folder) - ) - else: - dep_folder = self.__pull_repo(repo, branch) - dep_project = project.read_project( - os.path.join(self.project['modules-path'], - dep_folder, - 'dbt_project.yml'), - self.project.profiles_dir, - profile_to_load=self.project.profile_to_load - ) - processed_repos.add(dep_folder) - self.__pull_deps_recursive( - dep_project['repositories'], processed_repos, i+1 - ) - except IOError as e: - if e.errno == errno.ENOENT: - error_string = basestring(e) - - if 'dbt_project.yml' in error_string: - error_string = ("'{}' is not a valid dbt project - " - "dbt_project.yml not found" - .format(repo)) - - elif 'git' in error_string: - error_string = ("Git CLI is a dependency of dbt, but " - "it is not installed!") - - raise dbt.exceptions.RuntimeException(error_string) - - else: - raise e +def _read_packages(project_yaml): + packages = project_yaml.get('packages', []) + repos = project_yaml.get('repositories', []) + if repos: + dbt.deprecations.warn('repositories') + packages += [_convert_repo(r) for r in repos] + return packages + + +class DepsTask(BaseTask): + def _check_for_duplicate_project_names(self, final_deps): + seen = set() + for _, package in final_deps.items(): + project_name = package.get_project_name(self.project) + if project_name in seen: + dbt.exceptions.raise_dependency_error( + 'Found duplicate project {}. This occurs when a dependency' + ' has the same project name as some other dependency.' + .format(project_name)) + seen.add(project_name) def run(self): dbt.clients.system.make_directory(self.project['modules-path']) + dbt.clients.system.make_directory(DOWNLOADS_PATH) + + packages = _read_packages(self.project) + if not packages: + logger.info('Warning: No packages found in dbt_project.yml') + return + + pending_deps = PackageListing.create(packages) + final_deps = PackageListing.create([]) + + while pending_deps: + sub_deps = PackageListing.create([]) + for name, package in pending_deps.items(): + final_deps.incorporate(package) + final_deps[name].resolve_version() + target_metadata = final_deps[name].fetch_metadata(self.project) + sub_deps.incorporate_from_yaml(_read_packages(target_metadata)) + pending_deps = sub_deps + + self._check_for_duplicate_project_names(final_deps) - self.__pull_deps_recursive(self.project['repositories']) + for _, package in final_deps.items(): + logger.info('Installing %s', package) + package.install(self.project) + logger.info(' Installed from %s\n', package.nice_version_name()) diff --git a/dbt/utils.py b/dbt/utils.py index 14cbf97deae..d0a591ec4b4 100644 --- a/dbt/utils.py +++ b/dbt/utils.py @@ -1,6 +1,8 @@ import os import hashlib import itertools +import collections +import functools import dbt.exceptions import dbt.flags @@ -240,10 +242,10 @@ def merge(*args): if len(args) == 1: return args[0] - l = list(args) - last = l.pop(len(l)-1) + lst = list(args) + last = lst.pop(len(lst)-1) - return _merge(merge(*l), last) + return _merge(merge(*lst), last) def _merge(a, b): @@ -264,10 +266,10 @@ def deep_merge(*args): if len(args) == 1: return args[0] - l = list(args) - last = l.pop(len(l)-1) + lst = list(args) + last = lst.pop(len(lst)-1) - return _deep_merge(deep_merge(*l), last) + return _deep_merge(deep_merge(*lst), last) def _deep_merge(destination, source): @@ -363,6 +365,36 @@ def flatten_nodes(dep_list): return list(itertools.chain.from_iterable(dep_list)) +class memoized(object): + '''Decorator. Caches a function's return value each time it is called. If + called later with the same arguments, the cached value is returned (not + reevaluated). + + Taken from https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize''' + def __init__(self, func): + self.func = func + self.cache = {} + + def __call__(self, *args): + if not isinstance(args, collections.Hashable): + # uncacheable. a list, for instance. + # better to not cache than blow up. + return self.func(*args) + if args in self.cache: + return self.cache[args] + value = self.func(*args) + self.cache[args] = value + return value + + def __repr__(self): + '''Return the function's docstring.''' + return self.func.__doc__ + + def __get__(self, obj, objtype): + '''Support instance methods.''' + return functools.partial(self.__call__, obj) + + def max_digits(values): """Given a series of decimal.Decimal values, find the maximum number of digits (on both sides of the decimal point) used by the diff --git a/dbt/version.py b/dbt/version.py index 2d6bb2c3f33..b4719560fde 100644 --- a/dbt/version.py +++ b/dbt/version.py @@ -31,7 +31,7 @@ def get_latest_version(): try: f = urlopen(REMOTE_VERSION_FILE) contents = f.read() - except: + except Exception: contents = '' if hasattr(contents, 'decode'): contents = contents.decode('utf-8') diff --git a/test/unit/test_semver.py b/test/unit/test_semver.py new file mode 100644 index 00000000000..05cde5b450f --- /dev/null +++ b/test/unit/test_semver.py @@ -0,0 +1,136 @@ +import unittest +import itertools + +from dbt.exceptions import VersionsNotCompatibleException +from dbt.semver import VersionSpecifier, UnboundedVersionSpecifier, \ + VersionRange, reduce_versions, versions_compatible, \ + resolve_to_specific_version + + +def create_range(start_version_string, end_version_string): + start = UnboundedVersionSpecifier() + end = UnboundedVersionSpecifier() + + if start_version_string is not None: + start = VersionSpecifier.from_version_string(start_version_string) + + if end_version_string is not None: + end = VersionSpecifier.from_version_string(end_version_string) + + return VersionRange(start=start, end=end) + + +class TestSemver(unittest.TestCase): + + def assertVersionSetResult(self, inputs, output_range): + expected = create_range(*output_range) + + for permutation in itertools.permutations(inputs): + self.assertDictEqual( + reduce_versions(*permutation), + expected) + + def assertInvalidVersionSet(self, inputs): + for permutation in itertools.permutations(inputs): + with self.assertRaises(VersionsNotCompatibleException): + reduce_versions(*permutation) + + def test__versions_compatible(self): + self.assertTrue( + versions_compatible('0.0.1', '0.0.1')) + self.assertFalse( + versions_compatible('0.0.1', '0.0.2')) + self.assertTrue( + versions_compatible('>0.0.1', '0.0.2')) + + def test__reduce_versions(self): + self.assertVersionSetResult( + ['0.0.1', '0.0.1'], + ['=0.0.1', '=0.0.1']) + + self.assertVersionSetResult( + ['0.0.1'], + ['=0.0.1', '=0.0.1']) + + self.assertVersionSetResult( + ['>0.0.1'], + ['>0.0.1', None]) + + self.assertVersionSetResult( + ['<0.0.1'], + [None, '<0.0.1']) + + self.assertVersionSetResult( + ['>0.0.1', '0.0.2'], + ['=0.0.2', '=0.0.2']) + + self.assertVersionSetResult( + ['0.0.2', '>=0.0.2'], + ['=0.0.2', '=0.0.2']) + + self.assertVersionSetResult( + ['>0.0.1', '>0.0.2', '>0.0.3'], + ['>0.0.3', None]) + + self.assertVersionSetResult( + ['>0.0.1', '<0.0.3'], + ['>0.0.1', '<0.0.3']) + + self.assertVersionSetResult( + ['>0.0.1', '0.0.2', '<0.0.3'], + ['=0.0.2', '=0.0.2']) + + self.assertVersionSetResult( + ['>0.0.1', '>=0.0.1', '<0.0.3'], + ['>0.0.1', '<0.0.3']) + + self.assertVersionSetResult( + ['>0.0.1', '<0.0.3', '<=0.0.3'], + ['>0.0.1', '<0.0.3']) + + self.assertVersionSetResult( + ['>0.0.1', '>0.0.2', '<0.0.3', '<0.0.4'], + ['>0.0.2', '<0.0.3']) + + self.assertVersionSetResult( + ['<=0.0.3', '>=0.0.3'], + ['>=0.0.3', '<=0.0.3']) + + self.assertInvalidVersionSet(['>0.0.2', '0.0.1']) + self.assertInvalidVersionSet(['>0.0.2', '0.0.2']) + self.assertInvalidVersionSet(['<0.0.2', '0.0.2']) + self.assertInvalidVersionSet(['<0.0.2', '>0.0.3']) + self.assertInvalidVersionSet(['<=0.0.3', '>0.0.3']) + self.assertInvalidVersionSet(['<0.0.3', '>=0.0.3']) + self.assertInvalidVersionSet(['<0.0.3', '>0.0.3']) + + def test__resolve_to_specific_version(self): + self.assertEqual( + resolve_to_specific_version( + create_range('>0.0.1', None), + ['0.0.1', '0.0.2']), + '0.0.2') + + self.assertEqual( + resolve_to_specific_version( + create_range('>=0.0.2', None), + ['0.0.1', '0.0.2']), + '0.0.2') + + self.assertEqual( + resolve_to_specific_version( + create_range('>=0.0.3', None), + ['0.0.1', '0.0.2']), + None) + + self.assertEqual( + resolve_to_specific_version( + create_range('>=0.0.3', '<0.0.5'), + ['0.0.3', '0.0.4', '0.0.5']), + '0.0.4') + + self.assertEqual( + resolve_to_specific_version( + create_range(None, '<=0.0.5'), + ['0.0.3', '0.1.4', '0.0.5']), + '0.0.5')