Skip to content

Commit

Permalink
Merge pull request #20 from onozaty/develop/v1.13.0
Browse files Browse the repository at this point in the history
Develop v1.13.0
  • Loading branch information
onozaty authored Jan 13, 2022
2 parents 886c38f + 638ee55 commit 9ba3f5d
Show file tree
Hide file tree
Showing 4 changed files with 425 additions and 0 deletions.
61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* [concat](#concat) Concat CSV files.
* [count](#count) Count the number of records.
* [exclude](#exclude) Exclude rows by included in another CSV file.
* [group](#group) Aggregate by group.
* [filter](#filter) Filter rows by condition.
* [head](#head) Show head few rows.
* [header](#header) Show header.
Expand Down Expand Up @@ -349,6 +350,66 @@ col1,col2
4,D
```

## group

Group by the value of the specified column and perform aggregation.

Currently, only counting is supported.
It's like `GROUP BY` + `COUNT` in SQL.

### Usage

```
csvt group -i INPUT -c COLUMN [--count-column COUNT_COLUMN] -o OUTPUT
```

```
Usage:
csvt group [flags]
Flags:
-i, --input string Input CSV file path.
-c, --column string Name of the column to use for grouping.
--count-column string (optional) Column name for the number of records. (default "COUNT")
-o, --output string Output CSV file path.
-h, --help help for group
```

### Example

The contents of `input.csv`.

```
col1,col2
1,B
2,B
3,A
4,D
5,C
6,D
7,D
8,E
9,A
10,D
```

Group the rows by the value of `col2` and aggregate the number of rows.

```
$ csvt group -i input.csv -c col2 -o output.csv
```

The contents of the created `output.csv`.

```
col2,COUNT
A,2
B,2
C,1
D,4
E,1
```

## filter

Create a new CSV file by filtering the input CSV file to rows that match the conditions.
Expand Down
115 changes: 115 additions & 0 deletions cmd/group.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package cmd

import (
"io"
_sort "sort"
"strconv"

"github.com/onozaty/csvt/csv"
"github.com/pkg/errors"
"github.com/spf13/cobra"
)

func newGroupCmd() *cobra.Command {

gcountCmd := &cobra.Command{
Use: "group",
Short: "Aggregate by group",
RunE: func(cmd *cobra.Command, args []string) error {

format, err := getFlagBaseCsvFormat(cmd.Flags())
if err != nil {
return err
}

inputPath, _ := cmd.Flags().GetString("input")
targetColumnName, _ := cmd.Flags().GetString("column")
outputPath, _ := cmd.Flags().GetString("output")
countColumnName, _ := cmd.Flags().GetString("count-column")

// 引数の解析に成功した時点で、エラーが起きてもUsageは表示しない
cmd.SilenceUsage = true

return runGroupCount(
format,
inputPath,
targetColumnName,
countColumnName,
outputPath)
},
}

gcountCmd.Flags().StringP("input", "i", "", "Input CSV file path.")
gcountCmd.MarkFlagRequired("input")
gcountCmd.Flags().StringP("column", "c", "", "Name of the column to use for grouping.")
gcountCmd.MarkFlagRequired("column")
gcountCmd.Flags().StringP("count-column", "", "COUNT", "(optional) Column name for the number of records.")
gcountCmd.Flags().StringP("output", "o", "", "Output CSV file path.")
gcountCmd.MarkFlagRequired("output")

return gcountCmd
}

func runGroupCount(format csv.Format, inputPath string, targetColumnName string, countColumnName string, outputPath string) error {

reader, writer, close, err := setupInputOutput(inputPath, outputPath, format)
if err != nil {
return err
}
defer close()

err = groupCount(reader, targetColumnName, countColumnName, writer)
if err != nil {
return err
}

return writer.Flush()
}

func groupCount(reader csv.CsvReader, targetColumnName string, countColumnName string, writer csv.CsvWriter) error {

// ヘッダ
columnNames, err := reader.Read()
if err != nil {
return errors.Wrap(err, "failed to read the CSV file")
}

targetColumnIndex, err := getTargetColumnIndex(columnNames, targetColumnName)
if err != nil {
return err
}

counter := map[string]int{}

for {
row, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return errors.Wrap(err, "failed to read the CSV file")
}

val := row[targetColumnIndex]
counter[val] = counter[val] + 1
}

if err := writer.Write([]string{targetColumnName, countColumnName}); err != nil {
return err
}

// グループ化した値でソートして出力
keys := []string{}
for k := range counter {
keys = append(keys, k)
}
_sort.Strings(keys)

for _, k := range keys {
if err := writer.Write([]string{k, strconv.Itoa(counter[k])}); err != nil {
return err
}
}

return nil
}
Loading

0 comments on commit 9ba3f5d

Please sign in to comment.