Skip to content

Commit

Permalink
Merge pull request #16 from onozaty/develop/v1.10.0
Browse files Browse the repository at this point in the history
Develop v1.10.0
  • Loading branch information
onozaty authored Jul 19, 2021
2 parents 2ec9a47 + 8eb9033 commit c046bf3
Show file tree
Hide file tree
Showing 8 changed files with 1,607 additions and 48 deletions.
84 changes: 84 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* [remove](#remove) Remove columns.
* [rename](#rename) Rename columns.
* [replace](#replace) Replace values.
* [sort](#sort) Sort rows.
* [slice](#slice) Slice specified range of rows.
* [transform](#transform) Transform format.
* [unique](#unique) Extract unique rows.
Expand Down Expand Up @@ -813,6 +814,89 @@ Please refer to the following for the syntax of regular expressions.

* https://golang.org/pkg/regexp/syntax/

## sort

Creates a new CSV file from the input CSV file by sorting by the values in the specified columns.

### Usage

```
csvt sort -i INPUT -c COLUMN1 ... [--desc] [--number] -o OUTPUT [--usingfile]
```

```
Usage:
csvt sort [flags]
Flags:
-i, --input string Input CSV file path.
-c, --column stringArray Name of the column to use for sorting.
--desc (optional) Sort in descending order. The default is ascending order.
--number (optional) Sorts as a number. The default is to sort as a string.
-o, --output string Output CSV file path.
--usingfile (optional) Use temporary files for sorting. Use this when sorting large files that will not fit in memory.
-h, --help help for sort
```

### Example

The contents of `input.csv`.

```
col1,col2
02,a
10,b
01,a
11,c
20,b
```

Sort by "col1".

```
$ csvt sort -i input.csv -c col1 -o output.csv
```

The contents of the created `output.tsv`.

```
col1,col2
01,a
02,a
10,b
11,c
20,b
```

By default, it is sorted as a string.
For example, it could look like this

```
col1
1
12
123
2
21
3
```

If you want to sort as a number, specify `--number`.

```
$ csvt sort -i input.csv -c col1 --number -o output.csv
```

```
col1
1
2
3
12
21
123
```

## slice

Create a new CSV file by slicing the specified range of rows from the input CSV file.
Expand Down
1 change: 1 addition & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func newRootCmd() *cobra.Command {
rootCmd.AddCommand(newConcatCmd())
rootCmd.AddCommand(newSliceCmd())
rootCmd.AddCommand(newAddCmd())
rootCmd.AddCommand(newSortCmd())

for _, c := range rootCmd.Commands() {
// フラグ以外は受け付けないように
Expand Down
123 changes: 123 additions & 0 deletions cmd/sort.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package cmd

import (
"github.com/onozaty/csvt/csv"
"github.com/spf13/cobra"
)

func newSortCmd() *cobra.Command {

sortCmd := &cobra.Command{
Use: "sort",
Short: "Sort rows",
RunE: func(cmd *cobra.Command, args []string) error {

format, err := getFlagBaseCsvFormat(cmd.Flags())
if err != nil {
return err
}

inputPath, _ := cmd.Flags().GetString("input")
targetColumnNames, _ := cmd.Flags().GetStringArray("column")
sortDescending, _ := cmd.Flags().GetBool("desc")
asNumber, _ := cmd.Flags().GetBool("number")
useFileRows, _ := cmd.Flags().GetBool("usingfile")
outputPath, _ := cmd.Flags().GetString("output")

// 引数の解析に成功した時点で、エラーが起きてもUsageは表示しない
cmd.SilenceUsage = true

return runSort(
format,
inputPath,
targetColumnNames,
outputPath,
SortOptions{
sortDescending: sortDescending,
asNumber: asNumber,
useFileRows: useFileRows,
})
},
}

sortCmd.Flags().StringP("input", "i", "", "Input CSV file path.")
sortCmd.MarkFlagRequired("input")
sortCmd.Flags().StringArrayP("column", "c", []string{}, "Name of the column to use for sorting.")
sortCmd.MarkFlagRequired("column")
sortCmd.Flags().BoolP("desc", "", false, "(optional) Sort in descending order. The default is ascending order.")
sortCmd.Flags().BoolP("number", "", false, "(optional) Sorts as a number. The default is to sort as a string.")
sortCmd.Flags().StringP("output", "o", "", "Output CSV file path.")
sortCmd.MarkFlagRequired("output")
sortCmd.Flags().BoolP("usingfile", "", false, "(optional) Use temporary files for sorting. Use this when sorting large files that will not fit in memory.")

return sortCmd
}

type SortOptions struct {
sortDescending bool
asNumber bool
useFileRows bool
}

func runSort(format csv.Format, inputPath string, targetColumnNames []string, outputPath string, options SortOptions) error {

reader, writer, close, err := setupInputOutput(inputPath, outputPath, format)
if err != nil {
return err
}
defer close()

err = sort(reader, targetColumnNames, writer, options)

if err != nil {
return err
}

return writer.Flush()
}

func sort(reader csv.CsvReader, targetColumnNames []string, writer csv.CsvWriter, options SortOptions) error {

var compare func(item1 string, item2 string) (int, error)

if options.asNumber {
compare = csv.CompareNumber
} else {
compare = csv.CompareString
}

if options.sortDescending {
compare = csv.Descending(compare)
}

var sortedRows csv.CsvSortedRows
var err error
if options.useFileRows {
sortedRows, err = csv.LoadCsvFileSortedRows(reader, targetColumnNames, compare)
} else {
sortedRows, err = csv.LoadCsvMemorySortedRows(reader, targetColumnNames, compare)
}
if err != nil {
return err
}

err = writer.Write(sortedRows.ColumnNames())
if err != nil {
return err
}

for i := 0; i < sortedRows.Count(); i++ {

row, err := sortedRows.Row(i)
if err != nil {
return err
}

err = writer.Write(row)
if err != nil {
return err
}
}

return nil
}
Loading

0 comments on commit c046bf3

Please sign in to comment.