Skip to content

Commit

Permalink
Add unique-strategy config option (#658)
Browse files Browse the repository at this point in the history
  • Loading branch information
nwagner84 authored Jul 12, 2023
1 parent 3d43bee commit ffbd462
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* #641 Stabilize `sample` command
* #642 Add `--squash` and `--merge` option
* #644 Add `!^` and `!$` operator
* #658 Add unique-strategy config option (`cat` command)

### Changed

Expand Down
35 changes: 28 additions & 7 deletions pica-toolkit/src/commands/cat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,28 @@ pub(crate) struct CatConfig {
/// Skip invalid records that can't be decoded.
pub(crate) skip_invalid: Option<bool>,

/// Strategy to determine duplicate records.
pub(crate) unique_strategy: Option<Strategy>,

/// Compress output in gzip format
pub(crate) gzip: Option<bool>,
}

#[derive(Clone, Debug, PartialEq, Eq, Default, ValueEnum)]
enum Strategy {
#[derive(
Clone,
Debug,
PartialEq,
Eq,
Default,
ValueEnum,
Deserialize,
Serialize,
)]
pub(crate) enum Strategy {
#[default]
#[serde(rename = "idn")]
Idn,
#[serde(rename = "hash")]
Hash,
}

Expand All @@ -52,12 +66,10 @@ pub(crate) struct Cat {
#[arg(
long,
requires = "unique",
default_value = "idn",
value_name = "strategy",
hide_possible_values = true,
hide_default_value = true
hide_possible_values = true
)]
unique_strategy: Strategy,
unique_strategy: Option<Strategy>,

/// Append to the given file, do not overwrite
#[arg(long)]
Expand Down Expand Up @@ -91,9 +103,18 @@ impl Cat {
config.global
);

let unique_strategy =
if let Some(strategy) = self.unique_strategy {
strategy
} else if let Some(ref config) = config.cat {
config.unique_strategy.clone().unwrap_or_default()
} else {
Strategy::default()
};

let mut seen = BTreeSet::new();
let key = |record: &ByteRecord| -> String {
match self.unique_strategy {
match unique_strategy {
Strategy::Idn => record
.idn()
.map(ToString::to_string)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[cat]
unique-strategy = "idn"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin.name = "pica"
args = "-c Pica.toml cat --unique"
status = "success"
stderr = ""
stdin = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n"
stdout = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[cat]
unique-strategy = "hash"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin.name = "pica"
args = "-c Pica.toml cat --unique"
status = "success"
stderr = ""
stdin = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n"
stdout = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[cat]
unique-strategy = "idn"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin.name = "pica"
args = "-c Pica.toml cat --unique --unique-strategy idn"
status = "success"
stderr = ""
stdin = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n003@ \u001f0123456789X\u001e012A \u001fa2\u001e\n"
stdout = "003@ \u001f0123456789X\u001e012A \u001fa1\u001e\n"

0 comments on commit ffbd462

Please sign in to comment.