From 5eb45b62da0805be975a8a36f4812b61c8cf8c12 Mon Sep 17 00:00:00 2001
From: onozaty <onozaty@gmail.com>
Date: Sun, 9 Jan 2022 19:53:28 +0900
Subject: [PATCH 1/5] =?UTF-8?q?[gcount]=20gcount=E3=81=AE=E5=AE=9F?=
 =?UTF-8?q?=E8=A3=85(=E3=83=86=E3=82=B9=E3=83=88=E3=81=AF=E3=81=BE?=
 =?UTF-8?q?=E3=81=A0)=20#19?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cmd/gcount.go | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
 cmd/root.go   |   1 +
 2 files changed, 123 insertions(+)
 create mode 100644 cmd/gcount.go

diff --git a/cmd/gcount.go b/cmd/gcount.go
new file mode 100644
index 0000000..6a7987d
--- /dev/null
+++ b/cmd/gcount.go
@@ -0,0 +1,122 @@
+package cmd
+
+import (
+	"io"
+	_sort "sort"
+	"strconv"
+
+	"github.com/onozaty/csvt/csv"
+	"github.com/pkg/errors"
+	"github.com/spf13/cobra"
+)
+
+func newGcountCmd() *cobra.Command {
+
+	gcountCmd := &cobra.Command{
+		Use:   "gcount",
+		Short: "Count the number of records in each group",
+		RunE: func(cmd *cobra.Command, args []string) error {
+
+			format, err := getFlagBaseCsvFormat(cmd.Flags())
+			if err != nil {
+				return err
+			}
+
+			inputPath, _ := cmd.Flags().GetString("input")
+			targetColumnName, _ := cmd.Flags().GetString("column")
+			outputPath, _ := cmd.Flags().GetString("output")
+			countColumnName, _ := cmd.Flags().GetString("count-column")
+
+			// 引数の解析に成功した時点で、エラーが起きてもUsageは表示しない
+			cmd.SilenceUsage = true
+
+			err = runGroupCount(
+				format,
+				inputPath,
+				targetColumnName,
+				countColumnName,
+				outputPath)
+
+			if err != nil {
+				return err
+			}
+
+			cmd.Printf("%d\n", count)
+
+			return nil
+		},
+	}
+
+	gcountCmd.Flags().StringP("input", "i", "", "Input CSV file path.")
+	gcountCmd.MarkFlagRequired("input")
+	gcountCmd.Flags().StringP("column", "c", "", "Name of the column to use for grouping.")
+	gcountCmd.Flags().StringP("count-column", "", "COUNT", "(optional) Column name for the number of records.")
+	gcountCmd.Flags().StringP("output", "o", "", "Output CSV file path.")
+	gcountCmd.MarkFlagRequired("output")
+
+	return gcountCmd
+}
+
+func runGroupCount(format csv.Format, inputPath string, targetColumnName string, countColumnName string, outputPath string) error {
+
+	reader, writer, close, err := setupInputOutput(inputPath, outputPath, format)
+	if err != nil {
+		return err
+	}
+	defer close()
+
+	err = groupCount(reader, targetColumnName, countColumnName, writer)
+	if err != nil {
+		return err
+	}
+
+	return writer.Flush()
+}
+
+func groupCount(reader csv.CsvReader, targetColumnName string, countColumnName string, writer csv.CsvWriter) error {
+
+	// ヘッダ
+	columnNames, err := reader.Read()
+	if err != nil {
+		return errors.Wrap(err, "failed to read the CSV file")
+	}
+
+	targetColumnIndex, err := getTargetColumnIndex(columnNames, targetColumnName)
+	if err != nil {
+		return err
+	}
+
+	counter := map[string]int{}
+
+	for {
+		row, err := reader.Read()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return errors.Wrap(err, "failed to read the CSV file")
+		}
+
+		val := row[targetColumnIndex]
+		counter[val] = counter[val] + 1
+	}
+
+	if err := writer.Write([]string{targetColumnName, countColumnName}); err != nil {
+		return err
+	}
+
+	// グループ化した値でソートして出力
+	keys := []string{}
+	for k := range counter {
+		keys = append(keys, k)
+	}
+	_sort.Strings(keys)
+
+	for _, k := range keys {
+		if err := writer.Write([]string{k, strconv.Itoa(counter[k])}); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/cmd/root.go b/cmd/root.go
index 62443a9..47098d3 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -44,6 +44,7 @@ func newRootCmd() *cobra.Command {
 	rootCmd.AddCommand(newSortCmd())
 	rootCmd.AddCommand(newSplitCmd())
 	rootCmd.AddCommand(newHeadCmd())
+	rootCmd.AddCommand(newGcountCmd())
 
 	for _, c := range rootCmd.Commands() {
 		// フラグ以外は受け付けないように

From aa7662dea5908f4499f3e72bffcd58c30d6c3ade Mon Sep 17 00:00:00 2001
From: onozaty <onozaty@gmail.com>
Date: Sun, 9 Jan 2022 20:58:49 +0900
Subject: [PATCH 2/5] =?UTF-8?q?[gcount]=20=E5=8B=95=E4=BD=9C=E7=A2=BA?=
 =?UTF-8?q?=E8=AA=8D=E3=81=A7=E5=8B=95=E3=81=8B=E3=81=AA=E3=81=8B=E3=81=A3?=
 =?UTF-8?q?=E3=81=9F=E9=83=A8=E5=88=86=E3=82=92=E4=BF=AE=E6=AD=A3=20#19?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cmd/gcount.go | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/cmd/gcount.go b/cmd/gcount.go
index 6a7987d..f620103 100644
--- a/cmd/gcount.go
+++ b/cmd/gcount.go
@@ -30,26 +30,19 @@ func newGcountCmd() *cobra.Command {
 			// 引数の解析に成功した時点で、エラーが起きてもUsageは表示しない
 			cmd.SilenceUsage = true
 
-			err = runGroupCount(
+			return runGroupCount(
 				format,
 				inputPath,
 				targetColumnName,
 				countColumnName,
 				outputPath)
-
-			if err != nil {
-				return err
-			}
-
-			cmd.Printf("%d\n", count)
-
-			return nil
 		},
 	}
 
 	gcountCmd.Flags().StringP("input", "i", "", "Input CSV file path.")
 	gcountCmd.MarkFlagRequired("input")
 	gcountCmd.Flags().StringP("column", "c", "", "Name of the column to use for grouping.")
+	gcountCmd.MarkFlagRequired("column")
 	gcountCmd.Flags().StringP("count-column", "", "COUNT", "(optional) Column name for the number of records.")
 	gcountCmd.Flags().StringP("output", "o", "", "Output CSV file path.")
 	gcountCmd.MarkFlagRequired("output")

From 75c3df2895d327064ac3a3bd3585ac3c72dcf8b2 Mon Sep 17 00:00:00 2001
From: onozaty <onozaty@gmail.com>
Date: Mon, 10 Jan 2022 23:01:39 +0900
Subject: [PATCH 3/5] =?UTF-8?q?[gcount]=20=E3=83=86=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E8=BF=BD=E5=8A=A0=20#19?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cmd/gcount_test.go | 248 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 248 insertions(+)
 create mode 100644 cmd/gcount_test.go

diff --git a/cmd/gcount_test.go b/cmd/gcount_test.go
new file mode 100644
index 0000000..711ad2c
--- /dev/null
+++ b/cmd/gcount_test.go
@@ -0,0 +1,248 @@
+package cmd
+
+import (
+	"os"
+	"testing"
+)
+
+func TestGcountCmd(t *testing.T) {
+
+	s := joinRows(
+		"col1,col2",
+		"1,B",
+		"2,A",
+		"3,a",
+		"4,A",
+		"5,C",
+		"6,C",
+		"7,A",
+		"8,AA",
+		"9,B",
+		"10,",
+	)
+
+	fi := createTempFile(t, s)
+	defer os.Remove(fi)
+
+	fo := createTempFile(t, "")
+	defer os.Remove(fo)
+
+	rootCmd := newRootCmd()
+	rootCmd.SetArgs([]string{
+		"gcount",
+		"-i", fi,
+		"-o", fo,
+		"-c", "col2",
+	})
+
+	err := rootCmd.Execute()
+	if err != nil {
+		t.Fatal("failed test\n", err)
+	}
+
+	result := readString(t, fo)
+
+	expect := joinRows(
+		"col2,COUNT",
+		",1",
+		"A,3",
+		"AA,1",
+		"B,2",
+		"C,2",
+		"a,1",
+	)
+
+	if result != expect {
+		t.Fatal("failed test\n", result)
+	}
+}
+
+func TestGcountCmd_countColumn(t *testing.T) {
+
+	s := joinRows(
+		"col1,col2",
+		"1,A",
+		"2,A",
+		"3,A",
+	)
+
+	fi := createTempFile(t, s)
+	defer os.Remove(fi)
+
+	fo := createTempFile(t, "")
+	defer os.Remove(fo)
+
+	rootCmd := newRootCmd()
+	rootCmd.SetArgs([]string{
+		"gcount",
+		"-i", fi,
+		"-o", fo,
+		"-c", "col2",
+		"--count-column", "count",
+	})
+
+	err := rootCmd.Execute()
+	if err != nil {
+		t.Fatal("failed test\n", err)
+	}
+
+	result := readString(t, fo)
+
+	expect := joinRows(
+		"col2,count",
+		"A,3",
+	)
+
+	if result != expect {
+		t.Fatal("failed test\n", result)
+	}
+}
+
+func TestGcountCmd_format(t *testing.T) {
+
+	s := joinRows(
+		"col1\tcol2",
+		"1\ta",
+		"2\tb",
+		"3\ta",
+		"4\tb",
+	)
+
+	fi := createTempFile(t, s)
+	defer os.Remove(fi)
+
+	fo := createTempFile(t, "")
+	defer os.Remove(fo)
+
+	rootCmd := newRootCmd()
+	rootCmd.SetArgs([]string{
+		"gcount",
+		"-i", fi,
+		"-o", fo,
+		"-c", "col2",
+		"--delim", `\t`,
+	})
+
+	err := rootCmd.Execute()
+	if err != nil {
+		t.Fatal("failed test\n", err)
+	}
+
+	result := readString(t, fo)
+
+	expect := joinRows(
+		"col2\tCOUNT",
+		"a\t2",
+		"b\t2",
+	)
+
+	if result != expect {
+		t.Fatal("failed test\n", result)
+	}
+}
+
+func TestGcountCmd_invalidFormat(t *testing.T) {
+
+	s := joinRows(
+		"col1,col2",
+		"1,1",
+	)
+
+	fi := createTempFile(t, s)
+	defer os.Remove(fi)
+
+	fo := createTempFile(t, "")
+	defer os.Remove(fo)
+
+	rootCmd := newRootCmd()
+	rootCmd.SetArgs([]string{
+		"gcount",
+		"-i", fi,
+		"-o", fo,
+		"-c", "col1",
+		"--delim", "xx",
+	})
+
+	err := rootCmd.Execute()
+	if err == nil || err.Error() != "flag delim should be specified with a single character" {
+		t.Fatal("failed test\n", err)
+	}
+}
+
+func TestGcountCmd_columnNotFound(t *testing.T) {
+
+	s := joinRows(
+		"col1,col2",
+		"1,A",
+		"2,A",
+		"3,A",
+	)
+
+	fi := createTempFile(t, s)
+	defer os.Remove(fi)
+
+	fo := createTempFile(t, "")
+	defer os.Remove(fo)
+
+	rootCmd := newRootCmd()
+	rootCmd.SetArgs([]string{
+		"gcount",
+		"-i", fi,
+		"-o", fo,
+		"-c", "col3",
+	})
+
+	err := rootCmd.Execute()
+	if err == nil || err.Error() != "missing col3 in the CSV file" {
+		t.Fatal("failed test\n", err)
+	}
+}
+
+func TestGcountCmd_inputFileNotFound(t *testing.T) {
+
+	fi := createTempFile(t, "")
+	defer os.Remove(fi)
+
+	fo := createTempFile(t, "")
+	defer os.Remove(fo)
+
+	rootCmd := newRootCmd()
+	rootCmd.SetArgs([]string{
+		"gcount",
+		"-i", fi + "____", // 存在しないファイル
+		"-o", fo,
+		"-c", "col1",
+	})
+
+	err := rootCmd.Execute()
+	if err == nil {
+		t.Fatal("failed test\n", err)
+	}
+
+	pathErr := err.(*os.PathError)
+	if pathErr.Path != fi+"____" || pathErr.Op != "open" {
+		t.Fatal("failed test\n", err)
+	}
+}
+
+func TestGcountCmd_inputFileEmpty(t *testing.T) {
+
+	fi := createTempFile(t, "")
+	defer os.Remove(fi)
+
+	fo := createTempFile(t, "")
+	defer os.Remove(fo)
+
+	rootCmd := newRootCmd()
+	rootCmd.SetArgs([]string{
+		"gcount",
+		"-i", fi,
+		"-o", fo,
+		"-c", "col1",
+	})
+
+	err := rootCmd.Execute()
+	if err == nil || err.Error() != "failed to read the CSV file: EOF" {
+		t.Fatal("failed test\n", err)
+	}
+}

From 3353c9f87f9ab28a5701d240c64786c7fe7f5dcd Mon Sep 17 00:00:00 2001
From: onozaty <onozaty@gmail.com>
Date: Wed, 12 Jan 2022 00:24:11 +0900
Subject: [PATCH 4/5] =?UTF-8?q?[group]=20=E3=82=B5=E3=83=96=E3=82=B3?=
 =?UTF-8?q?=E3=83=9E=E3=83=B3=E3=83=89=E5=90=8D=E3=82=92gcount->group?=
 =?UTF-8?q?=E3=81=AB=E5=A4=89=E6=9B=B4=20#19?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cmd/{gcount.go => group.go}           |  6 +++---
 cmd/{gcount_test.go => group_test.go} | 28 +++++++++++++--------------
 cmd/root.go                           |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)
 rename cmd/{gcount.go => group.go} (96%)
 rename cmd/{gcount_test.go => group_test.go} (88%)

diff --git a/cmd/gcount.go b/cmd/group.go
similarity index 96%
rename from cmd/gcount.go
rename to cmd/group.go
index f620103..31d3fa8 100644
--- a/cmd/gcount.go
+++ b/cmd/group.go
@@ -10,11 +10,11 @@ import (
 	"github.com/spf13/cobra"
 )
 
-func newGcountCmd() *cobra.Command {
+func newGroupCmd() *cobra.Command {
 
 	gcountCmd := &cobra.Command{
-		Use:   "gcount",
-		Short: "Count the number of records in each group",
+		Use:   "group",
+		Short: "Aggregate by group",
 		RunE: func(cmd *cobra.Command, args []string) error {
 
 			format, err := getFlagBaseCsvFormat(cmd.Flags())
diff --git a/cmd/gcount_test.go b/cmd/group_test.go
similarity index 88%
rename from cmd/gcount_test.go
rename to cmd/group_test.go
index 711ad2c..f523120 100644
--- a/cmd/gcount_test.go
+++ b/cmd/group_test.go
@@ -5,7 +5,7 @@ import (
 	"testing"
 )
 
-func TestGcountCmd(t *testing.T) {
+func TestGroupCmd(t *testing.T) {
 
 	s := joinRows(
 		"col1,col2",
@@ -29,7 +29,7 @@ func TestGcountCmd(t *testing.T) {
 
 	rootCmd := newRootCmd()
 	rootCmd.SetArgs([]string{
-		"gcount",
+		"group",
 		"-i", fi,
 		"-o", fo,
 		"-c", "col2",
@@ -57,7 +57,7 @@ func TestGcountCmd(t *testing.T) {
 	}
 }
 
-func TestGcountCmd_countColumn(t *testing.T) {
+func TestGroupCmd_countColumn(t *testing.T) {
 
 	s := joinRows(
 		"col1,col2",
@@ -74,7 +74,7 @@ func TestGcountCmd_countColumn(t *testing.T) {
 
 	rootCmd := newRootCmd()
 	rootCmd.SetArgs([]string{
-		"gcount",
+		"group",
 		"-i", fi,
 		"-o", fo,
 		"-c", "col2",
@@ -98,7 +98,7 @@ func TestGcountCmd_countColumn(t *testing.T) {
 	}
 }
 
-func TestGcountCmd_format(t *testing.T) {
+func TestGroupCmd_format(t *testing.T) {
 
 	s := joinRows(
 		"col1\tcol2",
@@ -116,7 +116,7 @@ func TestGcountCmd_format(t *testing.T) {
 
 	rootCmd := newRootCmd()
 	rootCmd.SetArgs([]string{
-		"gcount",
+		"group",
 		"-i", fi,
 		"-o", fo,
 		"-c", "col2",
@@ -141,7 +141,7 @@ func TestGcountCmd_format(t *testing.T) {
 	}
 }
 
-func TestGcountCmd_invalidFormat(t *testing.T) {
+func TestGroupCmd_invalidFormat(t *testing.T) {
 
 	s := joinRows(
 		"col1,col2",
@@ -156,7 +156,7 @@ func TestGcountCmd_invalidFormat(t *testing.T) {
 
 	rootCmd := newRootCmd()
 	rootCmd.SetArgs([]string{
-		"gcount",
+		"group",
 		"-i", fi,
 		"-o", fo,
 		"-c", "col1",
@@ -169,7 +169,7 @@ func TestGcountCmd_invalidFormat(t *testing.T) {
 	}
 }
 
-func TestGcountCmd_columnNotFound(t *testing.T) {
+func TestGroupCmd_columnNotFound(t *testing.T) {
 
 	s := joinRows(
 		"col1,col2",
@@ -186,7 +186,7 @@ func TestGcountCmd_columnNotFound(t *testing.T) {
 
 	rootCmd := newRootCmd()
 	rootCmd.SetArgs([]string{
-		"gcount",
+		"group",
 		"-i", fi,
 		"-o", fo,
 		"-c", "col3",
@@ -198,7 +198,7 @@ func TestGcountCmd_columnNotFound(t *testing.T) {
 	}
 }
 
-func TestGcountCmd_inputFileNotFound(t *testing.T) {
+func TestGroupCmd_inputFileNotFound(t *testing.T) {
 
 	fi := createTempFile(t, "")
 	defer os.Remove(fi)
@@ -208,7 +208,7 @@ func TestGcountCmd_inputFileNotFound(t *testing.T) {
 
 	rootCmd := newRootCmd()
 	rootCmd.SetArgs([]string{
-		"gcount",
+		"group",
 		"-i", fi + "____", // 存在しないファイル
 		"-o", fo,
 		"-c", "col1",
@@ -225,7 +225,7 @@ func TestGcountCmd_inputFileNotFound(t *testing.T) {
 	}
 }
 
-func TestGcountCmd_inputFileEmpty(t *testing.T) {
+func TestGroupCmd_inputFileEmpty(t *testing.T) {
 
 	fi := createTempFile(t, "")
 	defer os.Remove(fi)
@@ -235,7 +235,7 @@ func TestGcountCmd_inputFileEmpty(t *testing.T) {
 
 	rootCmd := newRootCmd()
 	rootCmd.SetArgs([]string{
-		"gcount",
+		"group",
 		"-i", fi,
 		"-o", fo,
 		"-c", "col1",
diff --git a/cmd/root.go b/cmd/root.go
index 47098d3..5e98421 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -44,7 +44,7 @@ func newRootCmd() *cobra.Command {
 	rootCmd.AddCommand(newSortCmd())
 	rootCmd.AddCommand(newSplitCmd())
 	rootCmd.AddCommand(newHeadCmd())
-	rootCmd.AddCommand(newGcountCmd())
+	rootCmd.AddCommand(newGroupCmd())
 
 	for _, c := range rootCmd.Commands() {
 		// フラグ以外は受け付けないように

From 638ee55a0626b36408aeb4248e8c26ad2850dea7 Mon Sep 17 00:00:00 2001
From: onozaty <onozaty@gmail.com>
Date: Wed, 12 Jan 2022 09:03:32 +0900
Subject: [PATCH 5/5] =?UTF-8?q?[group]=20README=E3=81=AB=E8=AA=AC=E6=98=8E?=
 =?UTF-8?q?=E8=BF=BD=E5=8A=A0=20#19?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/README.md b/README.md
index ab66fa2..707e543 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@
 * [concat](#concat) Concat CSV files.
 * [count](#count) Count the number of records.
 * [exclude](#exclude) Exclude rows by included in another CSV file.
+* [group](#group) Aggregate by group.
 * [filter](#filter) Filter rows by condition.
 * [head](#head) Show head few rows.
 * [header](#header) Show header.
@@ -349,6 +350,66 @@ col1,col2
 4,D
 ```
 
+## group
+
+Group by the value of the specified column and perform aggregation.  
+
+Currently, only counting is supported.  
+It's like `GROUP BY` + `COUNT` in SQL.
+
+### Usage
+
+```
+csvt group -i INPUT -c COLUMN [--count-column COUNT_COLUMN] -o OUTPUT
+```
+
+```
+Usage:
+  csvt group [flags]
+
+Flags:
+  -i, --input string          Input CSV file path.
+  -c, --column string         Name of the column to use for grouping.
+      --count-column string   (optional) Column name for the number of records. (default "COUNT")
+  -o, --output string         Output CSV file path.
+  -h, --help                  help for group
+```
+
+### Example
+
+The contents of `input.csv`.
+
+```
+col1,col2
+1,B
+2,B
+3,A
+4,D
+5,C
+6,D
+7,D
+8,E
+9,A
+10,D
+```
+
+Group the rows by the value of `col2` and aggregate the number of rows.
+
+```
+$ csvt group -i input.csv -c col2 -o output.csv
+```
+
+The contents of the created `output.csv`.
+
+```
+col2,COUNT
+A,2
+B,2
+C,1
+D,4
+E,1
+```
+
 ## filter
 
 Create a new CSV file by filtering the input CSV file to rows that match the conditions.