From ffbd462025486bdfacffb340a77993d7a1cdabc5 Mon Sep 17 00:00:00 2001 From: Nico Wagner Date: Wed, 12 Jul 2023 21:19:32 +0200 Subject: [PATCH] Add `unique-strategy` config option (#658) --- CHANGELOG.md | 1 + pica-toolkit/src/commands/cat.rs | 35 +++++++++++++++---- .../Pica.toml | 2 ++ .../cat/019-cat-unique-strategy-config1.toml | 6 ++++ .../Pica.toml | 2 ++ .../cat/020-cat-unique-strategy-config2.toml | 6 ++++ .../Pica.toml | 2 ++ .../cat/021-cat-unique-strategy-config3.toml | 6 ++++ 8 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.in/Pica.toml create mode 100644 pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.toml create mode 100644 pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.in/Pica.toml create mode 100644 pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.toml create mode 100644 pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.in/Pica.toml create mode 100644 pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.toml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3688bbdb0..c70e99b66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * #641 Stabilize `sample` command * #642 Add `--squash` and `--merge` option * #644 Add `!^` and `!$` operator +* #658 Add unique-strategy config option (`cat` command) ### Changed diff --git a/pica-toolkit/src/commands/cat.rs b/pica-toolkit/src/commands/cat.rs index 6fe85b53c..e0d028db9 100644 --- a/pica-toolkit/src/commands/cat.rs +++ b/pica-toolkit/src/commands/cat.rs @@ -18,14 +18,28 @@ pub(crate) struct CatConfig { /// Skip invalid records that can't be decoded. pub(crate) skip_invalid: Option, + /// Strategy to determine duplicate records. + pub(crate) unique_strategy: Option, + /// Compress output in gzip format pub(crate) gzip: Option, } -#[derive(Clone, Debug, PartialEq, Eq, Default, ValueEnum)] -enum Strategy { +#[derive( + Clone, + Debug, + PartialEq, + Eq, + Default, + ValueEnum, + Deserialize, + Serialize, +)] +pub(crate) enum Strategy { #[default] + #[serde(rename = "idn")] Idn, + #[serde(rename = "hash")] Hash, } @@ -52,12 +66,10 @@ pub(crate) struct Cat { #[arg( long, requires = "unique", - default_value = "idn", value_name = "strategy", - hide_possible_values = true, - hide_default_value = true + hide_possible_values = true )] - unique_strategy: Strategy, + unique_strategy: Option, /// Append to the given file, do not overwrite #[arg(long)] @@ -91,9 +103,18 @@ impl Cat { config.global ); + let unique_strategy = + if let Some(strategy) = self.unique_strategy { + strategy + } else if let Some(ref config) = config.cat { + config.unique_strategy.clone().unwrap_or_default() + } else { + Strategy::default() + }; + let mut seen = BTreeSet::new(); let key = |record: &ByteRecord| -> String { - match self.unique_strategy { + match unique_strategy { Strategy::Idn => record .idn() .map(ToString::to_string) diff --git a/pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.in/Pica.toml b/pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.in/Pica.toml new file mode 100644 index 000000000..4c1307c3f --- /dev/null +++ b/pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.in/Pica.toml @@ -0,0 +1,2 @@ +[cat] +unique-strategy = "idn" diff --git a/pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.toml b/pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.toml new file mode 100644 index 000000000..959b425bf --- /dev/null +++ b/pica-toolkit/tests/snapshot/cat/019-cat-unique-strategy-config1.toml @@ -0,0 +1,6 @@ +bin.name = "pica" +args = "-c Pica.toml cat --unique" +status = "success" +stderr = "" +stdin = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n" +stdout = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n" diff --git a/pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.in/Pica.toml b/pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.in/Pica.toml new file mode 100644 index 000000000..47a055b8d --- /dev/null +++ b/pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.in/Pica.toml @@ -0,0 +1,2 @@ +[cat] +unique-strategy = "hash" diff --git a/pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.toml b/pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.toml new file mode 100644 index 000000000..4c0d2bb08 --- /dev/null +++ b/pica-toolkit/tests/snapshot/cat/020-cat-unique-strategy-config2.toml @@ -0,0 +1,6 @@ +bin.name = "pica" +args = "-c Pica.toml cat --unique" +status = "success" +stderr = "" +stdin = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n" +stdout = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n" diff --git a/pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.in/Pica.toml b/pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.in/Pica.toml new file mode 100644 index 000000000..4c1307c3f --- /dev/null +++ b/pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.in/Pica.toml @@ -0,0 +1,2 @@ +[cat] +unique-strategy = "idn" diff --git a/pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.toml b/pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.toml new file mode 100644 index 000000000..7802a7b9d --- /dev/null +++ b/pica-toolkit/tests/snapshot/cat/021-cat-unique-strategy-config3.toml @@ -0,0 +1,6 @@ +bin.name = "pica" +args = "-c Pica.toml cat --unique --unique-strategy idn" +status = "success" +stderr = "" +stdin = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n" +stdout = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n"