Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve existing --no-checkout flag #926

Merged
merged 2 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where

## 0.14.3 (UNRELEASED)

- Adds support for disabling the working-copy checkout of specific datasets using the commands `kart import DATASET --no-checkout` or `kart checkout --not-dataset=DATASET`, and re-enabling it using `kart checkout --dataset=DATASET`. [#926](https://github.com/koordinates/kart/pull/926)
- Adds information on referencing and citing Kart to `CITATION`. [#914](https://github.com/koordinates/kart/pull/914)
- Fixes a bug where Kart would misidentify a non-Kart repo as a Kart V1 repo in some circumstances. [#918](https://github.com/koordinates/kart/issues/918)

Expand Down
6 changes: 5 additions & 1 deletion kart/byod/point_cloud_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
6 changes: 5 additions & 1 deletion kart/byod/raster_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
93 changes: 91 additions & 2 deletions kart/checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@
type=SpatialFilterString(encoding="utf-8"),
help=spatial_filter_help_text(),
)
@click.option(
"--dataset",
"do_checkout_spec",
multiple=True,
help="Request that a particular dataset be checked out (one which is currently configured to not be checked out)",
)
@click.option(
"--not-dataset",
"non_checkout_spec",
multiple=True,
help="Request that a particular dataset *not* be checked out (one which is currently configured to be checked out)",
)
@click.argument("refish", default=None, required=False, shell_complete=ref_completer)
def checkout(
ctx,
Expand All @@ -58,6 +70,8 @@ def checkout(
discard_changes,
do_guess,
spatial_filter_spec,
do_checkout_spec,
non_checkout_spec,
refish,
):
"""Switch branches or restore working tree files"""
Expand Down Expand Up @@ -126,8 +140,38 @@ def checkout(
"The spatial filter has been updated in the config and no longer matches the working copy."
)

non_checkout_datasets = repo.non_checkout_datasets
if do_checkout_spec or non_checkout_spec:
do_checkout_spec = set(do_checkout_spec)
non_checkout_spec = set(non_checkout_spec)
_verify_checkout_datasets_spec(
repo,
commit,
refish,
do_checkout_spec,
non_checkout_spec,
non_checkout_datasets,
)
non_checkout_datasets = (
non_checkout_datasets | non_checkout_spec
) - do_checkout_spec

do_switch_checkout_datasets = not repo.working_copy.matches_non_checkout_datasets(
non_checkout_datasets
)

# Again, we also allow switching of set of checked out / non-checked out datasets just by
# writing it directly to the config and then running `kart checkout`, but using
# `kart checkout --dataset=foo --not-dataset=bar` is preferred.
if do_switch_checkout_datasets and not (do_checkout_spec or non_checkout_spec):
click.echo(
"The set of datasets to be checked out has been updated in the config and no longer matches the working copy."
)

discard_changes = discard_changes or force
if (do_switch_commit or do_switch_spatial_filter) and not discard_changes:
if (
do_switch_commit or do_switch_spatial_filter or do_switch_checkout_datasets
) and not discard_changes:
ctx.obj.check_not_dirty(help_message=_DISCARD_CHANGES_HELP_MESSAGE)

if new_branch and new_branch in repo.branches:
Expand Down Expand Up @@ -170,17 +214,37 @@ def checkout(
if spatial_filter_spec is not None:
spatial_filter_spec.write_config(repo, update_remote=promisor_remote)

if do_checkout_spec or non_checkout_spec:
repo.configure_do_checkout_datasets(do_checkout_spec, True)
repo.configure_do_checkout_datasets(non_checkout_spec, False)

TableWorkingCopy.ensure_config_exists(repo)
repo.set_head(head_ref)

repo_key_filter = (
RepoKeyFilter.exclude_datasets(non_checkout_datasets)
if non_checkout_datasets
else RepoKeyFilter.MATCH_ALL
)
parts_to_create = (
repo.datasets().working_copy_part_types() if not repo.head_is_unborn else ()
repo.datasets(repo_key_filter=repo_key_filter).working_copy_part_types()
if not repo.head_is_unborn
else ()
)

if do_switch_commit or do_switch_spatial_filter or discard_changes:
# Changing commit, changing spatial filter, or discarding changes mean we need to update every dataset:
repo.working_copy.reset_to_head(
rewrite_full=do_switch_spatial_filter,
create_parts_if_missing=parts_to_create,
non_checkout_datasets=non_checkout_datasets,
)
elif do_switch_checkout_datasets:
# Not doing any of the above - just need to change those datasets newly added / removed from the non_checkout_list.
repo.working_copy.reset_to_head(
non_checkout_datasets=non_checkout_datasets,
only_update_checkout_datasets=True,
create_parts_if_missing=parts_to_create,
)
elif parts_to_create:
# Possibly we needn't auto-create any working copy here at all, but lots of tests currently depend on it.
Expand All @@ -189,6 +253,31 @@ def checkout(
)


def _verify_checkout_datasets_spec(
repo, commit, refish, do_checkout_spec, non_checkout_spec, non_checkout_datasets
):
# Check the set of datasets that the user wants to check out / not check out, to make sure we've heard of them.
# (avoid the bad experience where the user disables check out of non-existing dataset "foo-bar" instead of "foo_bar").
if do_checkout_spec & non_checkout_spec:
bad_ds = next(iter(do_checkout_spec & non_checkout_spec))
raise click.BadParameter(
f"Dataset {bad_ds} should not be present in both --dataset and --not-dataset",
param_hint="dataset",
)
# Only datasets that are not already in the config are checked - if the user managed to mark it as non-checkout before,
# they can mark it as checkout now, even if we can't find it any more.
new_spec = (do_checkout_spec | non_checkout_spec) - non_checkout_datasets
if not new_spec:
return
datasets_at_commit = repo.datasets(commit)
for ds_path in new_spec:
if ds_path not in datasets_at_commit:
raise click.BadParameter(
f"No dataset {ds_path} at commit {refish or 'HEAD'}",
param_hint="dataset" if ds_path in do_checkout_spec else "not-dataset",
)


@functools.lru_cache()
def _git_fetch_supports_flag(repo, flag):
r = subprocess.run(
Expand Down
6 changes: 5 additions & 1 deletion kart/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def list_import_formats(ctx):
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--dataset-path", "--dataset", "ds_path", help="The dataset's path once imported"
Expand Down
6 changes: 5 additions & 1 deletion kart/point_cloud/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
6 changes: 5 additions & 1 deletion kart/raster/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
29 changes: 29 additions & 0 deletions kart/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,35 @@ def spatial_filter(self):

return SpatialFilter.from_repo_config(self)

def configure_do_checkout_datasets(self, dataset_paths, do_checkout):
for dataset_path in dataset_paths:
key = f"dataset.{dataset_path}.checkout"
if do_checkout:
# Checking out a dataset is the default, we don't clutter the config with it.
self.del_config(key)
else:
# Specifically mark this dataset as do-not-checkout.
self.config[key] = False

@property
def non_checkout_datasets(self):
result = set()
config = self.config
for entry in config:
parts = entry.name.split(".", maxsplit=3)
if len(parts) > 3:
# Handle a name-containing-dots ie "dataset.NAME.CONTAINING.DOTS.checkout"
prefix, rest = entry.name.split(".", maxsplit=1)
parts = [prefix, *rest.rsplit(".", maxsplit=1)]
if (
len(parts) == 3
and parts[0] == "dataset"
and parts[2] == "checkout"
and not config.get_bool(entry.name)
):
result.add(parts[1])
return result

def get_config_str(self, key, default=None):
return self.config[key] if key in self.config else default

Expand Down
3 changes: 3 additions & 0 deletions kart/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,9 @@ def __init__(
self.filter_dataset_type = filter_dataset_type
self.force_dataset_class = force_dataset_class

def __contains__(self, ds_path):
return self.get(ds_path) is not None

def __getitem__(self, ds_path):
"""Get a specific dataset by path."""
result = self.get(ds_path)
Expand Down
7 changes: 6 additions & 1 deletion kart/tabular/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,11 @@ def any_at_all(iterable):
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--num-workers",
Expand Down Expand Up @@ -346,6 +350,7 @@ def table_import(

# During imports we can keep old changes since they won't conflict with newly imported datasets.
parts_to_create = [PartType.TABULAR] if do_checkout else []
repo.configure_do_checkout_datasets(new_ds_paths, do_checkout)
repo.working_copy.reset_to_head(
repo_key_filter=RepoKeyFilter.datasets(new_ds_paths),
create_parts_if_missing=parts_to_create,
Expand Down
6 changes: 4 additions & 2 deletions kart/tabular/working_copy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,9 @@ def diff_dataset_to_working_copy(
return DatasetDiff()

feature_filter = ds_filter.get("feature", ds_filter.child_type())
with self.session():
with self.session() as sess:
if self._is_noncheckout_dataset(sess, dataset):
return DatasetDiff()
meta_diff = self.diff_dataset_to_working_copy_meta(dataset, raise_if_dirty)
feature_diff = self.diff_dataset_to_working_copy_feature(
dataset, feature_filter, meta_diff, raise_if_dirty
Expand Down Expand Up @@ -1181,12 +1183,12 @@ def _delete_meta(self, sess, dataset):

def _do_reset_datasets(
self,
*,
base_datasets,
target_datasets,
ds_inserts,
ds_updates,
ds_deletes,
*,
base_tree=None,
target_tree=None,
target_commit=None,
Expand Down
1 change: 1 addition & 0 deletions kart/tile/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ def import_tiles(self):
self.repo.references[fast_import_on_branch].delete()

parts_to_create = [PartType.WORKDIR] if self.do_checkout else []
self.repo.configure_do_checkout_datasets([self.dataset_path], self.do_checkout)
# During imports we can keep old changes since they won't conflict with newly imported datasets.
self.repo.working_copy.reset_to_head(
repo_key_filter=RepoKeyFilter.datasets([self.dataset_path]),
Expand Down
6 changes: 5 additions & 1 deletion kart/tile/tile_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,11 @@ def diff_to_working_copy(
The resulting diffs are missing almost all of the info about the new tiles,
but this is faster and more reliable if this information is not needed.
"""
workdir = self.repo.working_copy.workdir
with workdir.state_session() as sess:
if workdir._is_noncheckout_dataset(sess, self.path):
return DatasetDiff()

tile_filter = ds_filter.get("tile", ds_filter.child_type())

current_metadata = self.tile_metadata
Expand Down Expand Up @@ -684,7 +689,6 @@ def apply_tile_diff(
"""
with object_builder.chdir(self.inner_path):
for delta in tile_diff.values():

if delta.type in ("insert", "update"):
new_val = delta.new_value
name = new_val.get("name")
Expand Down
2 changes: 1 addition & 1 deletion kart/workdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,12 @@ def _is_head(self, commit_or_tree):

def _do_reset_datasets(
self,
*,
base_datasets,
target_datasets,
ds_inserts,
ds_updates,
ds_deletes,
*,
base_tree=None,
target_tree=None,
target_commit=None,
Expand Down
Loading
Loading