Skip to content

Commit

Permalink
make dependencies more compact (#342)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmcarthur authored Mar 20, 2017
1 parent 321600b commit 4f79c28
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 44 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- Refactor: factor out jinja interactions ([#309](https://github.com/fishtown-analytics/dbt/pull/309))
- Speedup: detect cycles at the end of compilation ([#307](https://github.com/fishtown-analytics/dbt/pull/307))
- Speedup: write graph file with gpickle instead of yaml ([#306](https://github.com/fishtown-analytics/dbt/pull/306))
- Clone dependencies with `--depth 1` to make them more compact ([#277](https://github.com/fishtown-analytics/dbt/issues/277), [#342](https://github.com/fishtown-analytics/dbt/pull/342))

## dbt 0.7.1 (February 28, 2017)

Expand Down
41 changes: 41 additions & 0 deletions dbt/clients/git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import subprocess

from dbt.logger import GLOBAL_LOGGER as logger


def log_cmd(cmd):
logger.debug('Executing "{}"'.format(' '.join(cmd)))


def run_cmd(cwd, cmd):
log_cmd(cmd)
proc = subprocess.Popen(
cmd,
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

return proc.communicate()


def clone(repo, cwd):
return run_cmd(cwd, ['git', 'clone', '--depth', '1', repo])


def checkout(cwd, branch=None):
if branch is None:
branch = 'master'

remote_branch = 'origin/{}'.format(branch)

logger.info(' Checking out branch {}.'.format(branch))

run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch])
run_cmd(cwd, ['git', 'fetch', '--depth', '1', 'origin', branch])
run_cmd(cwd, ['git', 'reset', '--hard', remote_branch])


def get_current_sha(cwd):
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'])

return out.decode('utf-8')
69 changes: 25 additions & 44 deletions dbt/task/deps.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import os
import errno
import re
import yaml
import pprint
import subprocess

import dbt.clients.git
import dbt.project as project

from dbt.logger import GLOBAL_LOGGER as logger
Expand All @@ -20,57 +19,39 @@ def __init__(self, args, project):
self.args = args
self.project = project

def __checkout_branch(self, branch, full_path):
logger.info(" checking out branch {}".format(branch))
proc = subprocess.Popen(
['git', 'checkout', branch],
cwd=full_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()

def __pull_repo(self, repo, branch=None):
proc = subprocess.Popen(
['git', 'clone', repo],
cwd=self.project['modules-path'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
modules_path = self.project['modules-path']

out, err = proc.communicate()
out, err = dbt.clients.git.clone(repo, modules_path)

exists = re.match(
"fatal: destination path '(.+)' already exists",
err.decode('utf-8')
)
exists = re.match("fatal: destination path '(.+)' already exists",
err.decode('utf-8'))

folder = None
start_sha = None

if exists:
folder = exists.group(1)
logger.info("updating existing dependency {}".format(folder))
full_path = os.path.join(self.project['modules-path'], folder)
proc = subprocess.Popen(
['git', 'fetch', '--all'],
cwd=full_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()
remote_branch = 'origin/master' if branch is None \
else 'origin/{}'.format(branch)
proc = subprocess.Popen(
['git', 'reset', '--hard', remote_branch],
cwd=full_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()
if branch is not None:
self.__checkout_branch(branch, full_path)
logger.info('Updating existing dependency {}.'.format(folder))
else:
matches = re.match("Cloning into '(.+)'", err.decode('utf-8'))
folder = matches.group(1)
full_path = os.path.join(self.project['modules-path'], folder)
logger.info("pulled new dependency {}".format(folder))
if branch is not None:
self.__checkout_branch(branch, full_path)
logger.info('Pulling new dependency {}.'.format(folder))

dependency_path = os.path.join(modules_path, folder)
start_sha = dbt.clients.git.get_current_sha(dependency_path)
dbt.clients.git.checkout(dependency_path, branch)
end_sha = dbt.clients.git.get_current_sha(dependency_path)

if exists:
if start_sha == end_sha:
logger.info(' Already at {}, nothing to do.'.format(
start_sha[:6]))
else:
logger.info(' Updated checkout from {} to {}.'.format(
start_sha[:6], end_sha[:6]))
else:
logger.info(' Checked out at {}.'.format(end_sha[:6]))

return folder

Expand Down

0 comments on commit 4f79c28

Please sign in to comment.