From b355a9662cecd8618e1f739df3d3eed7e1da233a Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 13 Jun 2023 19:24:39 +0900 Subject: [PATCH 1/2] dvc-data: bump to 1.11.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bfcf736a70..1cc794b655 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "configobj>=5.0.6", "distro>=1.3", "dpath<3,>=2.1.0", - "dvc-data>=1.10.0,<1.11.0", + "dvc-data>=1.11.0,<1.12.0", "dvc-http>=2.29.0", "dvc-render>=0.3.1,<1", "dvc-studio-client>=0.9.2,<1", From 536b6a85b87a71c034e00b0c28d05130664f8b5c Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Tue, 13 Jun 2023 18:27:35 +0900 Subject: [PATCH 2/2] cache migrate: add utility for migrating local cache files to 3.x --- dvc/cachemgr.py | 39 ++++++++++++++++++++++++++++++++++++++- dvc/commands/cache.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/dvc/cachemgr.py b/dvc/cachemgr.py index a78ff0ed25..b8df7b1f9b 100644 --- a/dvc/cachemgr.py +++ b/dvc/cachemgr.py @@ -1,10 +1,13 @@ import os -from typing import Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple from dvc.fs import GitFileSystem, Schemes from dvc_data.hashfile.db import get_odb from dvc_data.hashfile.hash import DEFAULT_ALGORITHM +if TYPE_CHECKING: + from dvc.repo import Repo + LEGACY_HASH_NAMES = {"md5-dos2unix", "params"} @@ -99,3 +102,37 @@ def local_cache_dir(self) -> str: (i.e. `dvc cache dir`). """ return self.legacy.path + + +def migrate_2_to_3(repo: "Repo", dry: bool = False): + """Migrate legacy 2.x objects to 3.x cache. + + Legacy 'md5-dos2unix' objects will be re-hashed with 'md5', added to 3.x cache, + and then a link from the legacy 2.x location to the 3.x location will be created. + """ + from dvc.fs.callbacks import TqdmCallback + from dvc.ui import ui + from dvc_data.hashfile.db.migrate import migrate, prepare + + src = repo.cache.legacy + dest = repo.cache.local + if dry: + oids = list(src._list_oids()) # pylint: disable=protected-access + ui.write( + f"{len(oids)} files will be re-hashed and migrated to the DVC 3.0 cache " + "location." + ) + return + + with TqdmCallback( + desc="Computing DVC 3.0 hashes", + unit="files", + ) as cb: + migration = prepare(src, dest, callback=cb) + + with TqdmCallback( + desc="Migrating to DVC 3.0 cache", + unit="files", + ) as cb: + count = migrate(migration, callback=cb) + ui.write(f"Migrated {count} files to DVC 3.0 cache location.") diff --git a/dvc/commands/cache.py b/dvc/commands/cache.py index e21b2081dc..3e151d9090 100644 --- a/dvc/commands/cache.py +++ b/dvc/commands/cache.py @@ -2,6 +2,7 @@ import os from dvc.cli import completion +from dvc.cli.command import CmdBase from dvc.cli.utils import append_doc_link, fix_subparsers from dvc.commands.config import CmdConfig from dvc.ui import ui @@ -36,6 +37,14 @@ def run(self): return 0 +class CmdCacheMigrate(CmdBase): + def run(self): + from dvc.cachemgr import migrate_2_to_3 + + migrate_2_to_3(self.repo, dry=self.args.dry) + return 0 + + def add_parser(subparsers, parent_parser): from dvc.commands.config import parent_config_parser @@ -54,8 +63,6 @@ def add_parser(subparsers, parent_parser): help="Use `dvc cache CMD --help` for command-specific help.", ) - fix_subparsers(cache_subparsers) - parent_cache_config_parser = argparse.ArgumentParser( add_help=False, parents=[parent_config_parser] ) @@ -86,3 +93,23 @@ def add_parser(subparsers, parent_parser): nargs="?", ).complete = completion.DIR cache_dir_parser.set_defaults(func=CmdCacheDir) + + CACHE_MIGRATE_HELP = "Migrate cached files to the DVC 3.0 cache location." + cache_migrate_parser = cache_subparsers.add_parser( + "migrate", + parents=[parent_parser], + description=append_doc_link(CACHE_HELP, "cache/migrate"), + help=CACHE_MIGRATE_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + cache_migrate_parser.add_argument( + "--dry", + help=( + "Only print actions which would be taken without actually migrating " + "any data." + ), + action="store_true", + ) + cache_migrate_parser.set_defaults(func=CmdCacheMigrate) + + fix_subparsers(cache_subparsers)