From 9547044c8fa8cbef53d1e389d220becd25cd2b07 Mon Sep 17 00:00:00 2001 From: Andrew Olsen Date: Tue, 1 Aug 2023 10:28:02 +1200 Subject: [PATCH 1/2] Some kart-data-ls improvements Backwards compatible at this point: - You need to add a flag to change kart-data-ls behaviour - kart-data-version is now deprecated, but still works. See https://github.com/koordinates/kart/issues/892 --- CHANGELOG.md | 1 + kart/data.py | 56 ++++++++++++++++++++++++++++++++++------------ tests/test_data.py | 32 ++++++++++++++++++++------ 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bd4f6cd6..5d437ffa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where - Fixes a bug where Git subprocesses (such as git clone) don't prompt the user for credentials or to resolve SSH issues on Windows. [#852](https://github.com/koordinates/kart/issues/852) - Better protection against XSS in the HTML diff viewer. [#884](https://github.com/koordinates/kart/pull/884) - Speed-up: greatly reduces the time taken to update the index file after checking out LFS tiles to the working copy. [#880](https://github.com/koordinates/kart/pull/880) +- Adds `--with-dataset-types` option to `kart data ls` command (and deprecates `kart data version`). [#892](https://github.com/koordinates/kart/issues/892) ## 0.14.0 diff --git a/kart/data.py b/kart/data.py index dffb222d3..686033360 100644 --- a/kart/data.py +++ b/kart/data.py @@ -28,27 +28,44 @@ def data(ctx, **kwargs): type=click.Choice(["text", "json"]), default="text", ) +@click.option( + "--with-dataset-types/--without-dataset-types", + is_flag=True, + help="When set, outputs the dataset type and version. (This may become the default in a later version of Kart)", +) @click.argument("refish", required=False, default="HEAD", shell_complete=ref_completer) @click.pass_context -def data_ls(ctx, output_format, refish): +def data_ls(ctx, output_format, with_dataset_types, refish): """List all of the datasets in the Kart repository""" repo = ctx.obj.get_repo(allowed_states=KartRepoState.ALL_STATES) - ds_paths = list(repo.datasets(refish).paths()) + json_list = [ + {"path": ds.path, "type": ds.DATASET_TYPE, "version": ds.VERSION} + for ds in repo.datasets(refish) + ] + + if output_format == "text" and not json_list: + repo_desc = ( + "Empty repository." + if repo.head_is_unborn + else "The commit at HEAD has no datasets." + ) + click.echo(f'{repo_desc}\n (use "kart import" to add some data)') + return if output_format == "text": - if ds_paths: - for ds_path in ds_paths: - click.echo(ds_path) + if with_dataset_types: + for ds_obj in json_list: + click.echo(f"{ds_obj['path']}\t({ds_obj['type']}.v{ds_obj['version']})") + else: - repo_desc = ( - "Empty repository." - if repo.head_is_unborn - else "The commit at HEAD has no datasets." - ) - click.echo(f'{repo_desc}\n (use "kart import" to add some data)') + for ds_obj in json_list: + click.echo(ds_obj["path"]) elif output_format == "json": - dump_json_output({"kart.data.ls/v1": ds_paths}, sys.stdout) + if not with_dataset_types: + json_list = [ds_obj["path"] for ds_obj in json_list] + version_marker = "v2" if with_dataset_types else "v1" + dump_json_output({f"kart.data.ls/{version_marker}": json_list}, sys.stdout) @data.command(name="rm") @@ -117,7 +134,7 @@ def data_rm(ctx, message, output_format, datasets): repo.gc("--auto") -@data.command(name="version") +@data.command(name="version", hidden=True) @click.option( "--output-format", "-o", @@ -126,7 +143,18 @@ def data_rm(ctx, message, output_format, datasets): ) @click.pass_context def data_version(ctx, output_format): - """Show the repository structure version""" + """ + Show the repository structure version. + + This was more useful when each Kart repositories contained a single type of table dataset at a single version - + eg table.v1 in one repository, vs table.v2 in another repository. + Now that Kart repositories can contain more than one dataset type eg table.v3, point-cloud.v1, raster.v1, + it no longer really conveys anything useful about the Kart repository's "version". + """ + click.echo( + "The command `kart data version` is deprecated - use `kart data ls --with-dataset-types` instead.", + err=True, + ) repo = ctx.obj.get_repo( allowed_states=KartRepoState.ALL_STATES, allow_unsupported_versions=True ) diff --git a/tests/test_data.py b/tests/test_data.py index 7b832a646..930e491c4 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -4,18 +4,36 @@ @pytest.mark.parametrize("output_format", ("text", "json")) -def test_data_ls(output_format, data_archive_readonly, cli_runner): +@pytest.mark.parametrize( + "extra_flag", ("--with-dataset-types", "--without-dataset-types") +) +def test_data_ls(output_format, extra_flag, data_archive_readonly, cli_runner): # All datasets now support getting metadata in either V1 or V2 format, # but if you don't specify a particular item, they will show all V2 items - # these are more self-explanatory to an end-user. with data_archive_readonly("points"): - r = cli_runner.invoke(["data", "ls", "-o", output_format]) + r = cli_runner.invoke(["data", "ls", "-o", output_format, extra_flag]) assert r.exit_code == 0, r - if output_format == "text": - assert r.stdout.splitlines() == ["nz_pa_points_topo_150k"] + if extra_flag == "--with-dataset-types": + if output_format == "text": + assert r.stdout.splitlines() == ["nz_pa_points_topo_150k\t(table.v3)"] + else: + output = json.loads(r.stdout) + assert output == { + "kart.data.ls/v2": [ + { + "path": "nz_pa_points_topo_150k", + "type": "table", + "version": 3, + } + ] + } else: - output = json.loads(r.stdout) - assert output == {"kart.data.ls/v1": ["nz_pa_points_topo_150k"]} + if output_format == "text": + assert r.stdout.splitlines() == ["nz_pa_points_topo_150k"] + else: + output = json.loads(r.stdout) + assert output == {"kart.data.ls/v1": ["nz_pa_points_topo_150k"]} @pytest.mark.parametrize("output_format", ("text", "json")) @@ -59,7 +77,7 @@ def test_data_rm(data_archive, cli_runner): r = cli_runner.invoke(["data", "ls"]) assert r.exit_code == 0, r.stderr assert r.stdout.splitlines() == [ - 'The commit at HEAD has no datasets.', + "The commit at HEAD has no datasets.", ' (use "kart import" to add some data)', ] From f46c9c5946c3f867c8edc683f771c5e00b66f0cc Mon Sep 17 00:00:00 2001 From: Andrew Olsen Date: Tue, 1 Aug 2023 10:57:38 +1200 Subject: [PATCH 2/2] kart-data-ls: address review comments --- kart/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kart/data.py b/kart/data.py index 686033360..3b3bb05d5 100644 --- a/kart/data.py +++ b/kart/data.py @@ -47,7 +47,7 @@ def data_ls(ctx, output_format, with_dataset_types, refish): repo_desc = ( "Empty repository." if repo.head_is_unborn - else "The commit at HEAD has no datasets." + else f"The commit at {refish} has no datasets." ) click.echo(f'{repo_desc}\n (use "kart import" to add some data)') return @@ -55,7 +55,7 @@ def data_ls(ctx, output_format, with_dataset_types, refish): if output_format == "text": if with_dataset_types: for ds_obj in json_list: - click.echo(f"{ds_obj['path']}\t({ds_obj['type']}.v{ds_obj['version']})") + click.echo("{path}\t({type}.v{version})".format(**ds_obj)) else: for ds_obj in json_list: