Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps/outs: get rid of self.info #4502

Merged
merged 1 commit into from
Aug 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions dvc/cache/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,17 +620,16 @@ def merge(self, ancestor_info, our_info, their_info):
assert their_info

if ancestor_info:
ancestor_hash = ancestor_info[self.tree.PARAM_CHECKSUM]
ancestor_hash = ancestor_info.value
ancestor = self.get_dir_cache(ancestor_hash)
else:
ancestor = []

our_hash = our_info[self.tree.PARAM_CHECKSUM]
our_hash = our_info.value
our = self.get_dir_cache(our_hash)

their_hash = their_info[self.tree.PARAM_CHECKSUM]
their_hash = their_info.value
their = self.get_dir_cache(their_hash)

merged = self._merge_dirs(ancestor, our, their)
hash_info = self.tree.save_dir_info(merged)
return {hash_info.name: hash_info.value}
return self.tree.save_dir_info(merged)
45 changes: 24 additions & 21 deletions dvc/dependency/param.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from dvc.dependency.local import LocalDependency
from dvc.exceptions import DvcException
from dvc.hash_info import HashInfo
from dvc.utils.serialize import LOADERS, ParseError


Expand All @@ -31,7 +32,7 @@ def __init__(self, stage, path, params):
else:
assert isinstance(params, dict)
self.params = list(params.keys())
info = params
info = {self.PARAM_PARAMS: params}

super().__init__(
stage,
Expand All @@ -40,46 +41,48 @@ def __init__(self, stage, path, params):
info=info,
)

def dumpd(self):
ret = super().dumpd()
if not self.hash_info:
ret[self.PARAM_PARAMS] = self.params
return ret

def fill_values(self, values=None):
"""Load params values dynamically."""
if not values:
return
info = {}
for param in self.params:
if param in values:
self.info[param] = values[param]
info[param] = values[param]
self.hash_info = HashInfo(self.PARAM_PARAMS, info)

def save(self):
super().save()
self.info = self.save_info()
def workspace_status(self):
status = super().workspace_status()

def status(self):
status = super().status()

if status[str(self)] == "deleted":
if status.get(str(self)) == "deleted":
return status

status = defaultdict(dict)
info = self.read_params()
info = self.hash_info.value if self.hash_info else {}
actual = self.read_params()
for param in self.params:
if param not in info.keys():
if param not in actual.keys():
st = "deleted"
elif param not in self.info:
elif param not in info:
st = "new"
elif info[param] != self.info[param]:
elif actual[param] != info[param]:
st = "modified"
else:
assert info[param] == self.info[param]
assert actual[param] == info[param]
continue

status[str(self)][param] = st

return status

def dumpd(self):
return {
self.PARAM_PATH: self.def_path,
self.PARAM_PARAMS: self.info or self.params,
}
def status(self):
return self.workspace_status()

def read_params(self):
if not self.exists:
Expand All @@ -102,7 +105,7 @@ def read_params(self):
pass
return ret

def save_info(self):
def get_hash(self):
info = self.read_params()

missing_params = set(self.params) - set(info.keys())
Expand All @@ -113,4 +116,4 @@ def save_info(self):
)
)

return info
return HashInfo(self.PARAM_PARAMS, info)
14 changes: 6 additions & 8 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ def _make_repo(self, *, locked=True):
rev = (d.get("rev_lock") if locked else None) or d.get("rev")
return external_repo(d["url"], rev=rev)

def _get_checksum(self, locked=True):
def _get_hash(self, locked=True):
from dvc.tree.repo import RepoTree

with self._make_repo(locked=locked) as repo:
try:
return repo.find_out_by_relpath(self.def_path).info["md5"]
return repo.find_out_by_relpath(self.def_path).hash_info
except OutputNotFoundError:
path = PathInfo(os.path.join(repo.root_dir, self.def_path))

Expand All @@ -64,16 +64,14 @@ def _get_checksum(self, locked=True):

# We are polluting our repo cache with some dir listing here
if tree.isdir(path):
return self.repo.cache.local.tree.get_hash(
path, tree=tree
).value
return self.repo.cache.local.tree.get_hash(path, tree=tree)
return tree.get_file_hash(path)

def workspace_status(self):
current_checksum = self._get_checksum(locked=True)
updated_checksum = self._get_checksum(locked=False)
current = self._get_hash(locked=True)
updated = self._get_hash(locked=False)

if current_checksum != updated_checksum:
if current != updated:
return {str(self): "update available"}

return {}
Expand Down
10 changes: 5 additions & 5 deletions dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,23 +138,23 @@ def fetch_external(self, paths: Iterable, **kwargs):
def download_update(result):
download_results.append(result)

save_infos = []
hash_infos = []
for path in paths:
if not self.repo_tree.exists(path):
raise PathMissingError(path, self.url)
hash_info = self.repo_tree.save_info(
hash_info = self.repo_tree.get_hash(
path, download_callback=download_update
)
).to_dict()
self.local_cache.save(
path,
self.repo_tree,
hash_info,
save_link=False,
download_callback=download_update,
)
save_infos.append(hash_info)
hash_infos.append(hash_info)

return sum(download_results), failed, save_infos
return sum(download_results), failed, hash_infos

def get_external(self, path, dest):
"""Convenience wrapper for fetch_external and checkout."""
Expand Down
10 changes: 10 additions & 0 deletions dvc/hash_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,13 @@ class HashInfo:

def __bool__(self):
return bool(self.value)

@classmethod
def from_dict(cls, d):
if not d:
return cls(None, None)
((name, value),) = d.items()
return cls(name, value)

def to_dict(self):
return {self.name: self.value} if self else {}
33 changes: 19 additions & 14 deletions dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
MergeError,
RemoteCacheRequiredError,
)
from dvc.hash_info import HashInfo

from ..tree.base import BaseTree

Expand Down Expand Up @@ -113,7 +114,7 @@ def __init__(
self.stage = stage
self.repo = stage.repo if stage else None
self.def_path = path
self.info = info
self.hash_info = HashInfo.from_dict(info)
if tree:
self.tree = tree
else:
Expand Down Expand Up @@ -174,10 +175,13 @@ def cache_path(self):

@property
def checksum(self):
return self.info.get(self.tree.PARAM_CHECKSUM)
return self.hash_info.value

def get_checksum(self):
return self.tree.get_hash(self.path_info).value
return self.get_hash().value

def get_hash(self):
return self.tree.get_hash(self.path_info)

@property
def is_dir_checksum(self):
Expand All @@ -187,9 +191,6 @@ def is_dir_checksum(self):
def exists(self):
return self.tree.exists(self.path_info)

def save_info(self):
return self.tree.save_info(self.path_info)

def changed_checksum(self):
return self.checksum != self.get_checksum()

Expand Down Expand Up @@ -267,7 +268,7 @@ def save(self):
self.verify_metric()

if not self.use_cache:
self.info = self.save_info()
self.hash_info = self.get_hash()
if not self.IS_DEPENDENCY:
logger.debug(
"Output '%s' doesn't use cache. Skipping saving.", self
Expand All @@ -280,15 +281,17 @@ def save(self):
logger.debug("Output '%s' didn't change. Skipping saving.", self)
return

self.info = self.save_info()
self.hash_info = self.get_hash()

def commit(self):
assert self.info
assert self.hash_info
if self.use_cache:
self.cache.save(self.path_info, self.cache.tree, self.info)
self.cache.save(
self.path_info, self.cache.tree, self.hash_info.to_dict()
)

def dumpd(self):
ret = copy(self.info)
ret = copy(self.hash_info.to_dict())
ret[self.PARAM_PATH] = self.def_path

if self.IS_DEPENDENCY:
Expand Down Expand Up @@ -337,7 +340,7 @@ def checkout(

return self.cache.checkout(
self.path_info,
self.info,
self.hash_info.to_dict(),
force=force,
progress_callback=progress_callback,
relink=relink,
Expand Down Expand Up @@ -535,11 +538,13 @@ def merge(self, ancestor, other):

if ancestor:
self._check_can_merge(ancestor)
ancestor_info = ancestor.info
ancestor_info = ancestor.hash_info
else:
ancestor_info = None

self._check_can_merge(self)
self._check_can_merge(other)

self.info = self.cache.merge(ancestor_info, self.info, other.info)
self.hash_info = self.cache.merge(
ancestor_info, self.hash_info, other.hash_info
)
8 changes: 5 additions & 3 deletions dvc/stage/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from funcy import get_in, lcat, project

from dvc import dependency, output
from dvc.hash_info import HashInfo

from . import PipelineStage, Stage, loads_from
from .exceptions import StageNameUnspecified, StageNotFound
Expand Down Expand Up @@ -42,9 +43,10 @@ def fill_from_lock(stage, lock_data=None):
if isinstance(item, dependency.ParamsDependency):
item.fill_values(get_in(lock_data, [stage.PARAM_PARAMS, path]))
continue
item.info = get_in(checksums, [key, path], {})
item.info = item.info.copy()
item.info.pop("path", None)
info = get_in(checksums, [key, path], {})
info = info.copy()
info.pop("path", None)
item.hash_info = HashInfo.from_dict(info)

@classmethod
def load_stage(cls, dvcfile, name, stage_data, lock_data=None):
Expand Down
7 changes: 6 additions & 1 deletion dvc/stage/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,12 @@ def to_single_stage_lockfile(stage: "Stage") -> dict:
params, deps = split_params_deps(stage)
deps, outs = [
[
OrderedDict([(PARAM_PATH, item.def_path), *item.info.items()])
OrderedDict(
[
(PARAM_PATH, item.def_path),
*item.hash_info.to_dict().items(),
]
)
for item in sort_by_path(items)
]
for items in [deps, stage.outs]
Expand Down
4 changes: 0 additions & 4 deletions dvc/tree/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,6 @@ def path_to_hash(self, path):

return "".join(parts)

def save_info(self, path_info, **kwargs):
hash_info = self.get_hash(path_info, **kwargs)
return {hash_info.name: hash_info.value}

def _calculate_hashes(self, file_infos):
file_infos = list(file_infos)
with Tqdm(
Expand Down
5 changes: 3 additions & 2 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
OverlappingOutputPathsError,
RecursiveAddingWhileUsingFilename,
)
from dvc.hash_info import HashInfo
from dvc.main import main
from dvc.output.base import OutputAlreadyTrackedError, OutputIsStageFileError
from dvc.repo import Repo as DvcRepo
Expand All @@ -42,7 +43,7 @@ def test_add(tmp_dir, dvc):
assert len(stage.outs) == 1
assert len(stage.deps) == 0
assert stage.cmd is None
assert stage.outs[0].info["md5"] == md5
assert stage.outs[0].hash_info == HashInfo("md5", md5)
assert stage.md5 is None

assert load_yaml("foo.dvc") == {
Expand Down Expand Up @@ -71,7 +72,7 @@ def test_add_directory(tmp_dir, dvc):
assert len(stage.deps) == 0
assert len(stage.outs) == 1

md5 = stage.outs[0].info["md5"]
md5 = stage.outs[0].hash_info.value

dir_info = dvc.cache.local.load_dir_cache(md5)
for info in dir_info:
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_run_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_uncached_outs_are_cached(tmp_dir, dvc, run_copy):
name="copy-foo-bar",
)
with dvc.state:
stage.outs[0].info = stage.outs[0].save_info()
stage.outs[0].hash_info = stage.outs[0].get_hash()
assert os.path.exists(relpath(stage.outs[0].cache_path))


Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):
cache = dvc.cache.local
with cache.tree.state:
path_info = PathInfo(erepo_dir / "dir")
hash_info = cache.tree.save_info(path_info)
hash_info = cache.tree.get_hash(path_info).to_dict()
cache.save(path_info, tree, hash_info)

for hash_ in expected:
Expand Down
Loading