Upgrade to Go 1.19 (#298)

* Upgrade to Go 1.19 * gofmt
segmentio · Aug 9, 2022 · f9e00f6 · f9e00f6
1 parent d84ed32
commit f9e00f6
Show file tree

Hide file tree

Showing 19 changed files with 66 additions and 81 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -9,6 +9,7 @@ jobs:
         go:
         - '1.17.x'
         - '1.18.x'
+        - '1.19.x'
         tags:
         - ''
         - purego
@@ -44,7 +45,7 @@ jobs:
     - name: Setup Go ${{ matrix.go }}
       uses: actions/setup-go@v3
       with:
-        go-version: 1.18.x
+        go-version: 1.19.x
 
     - name: Validate formatting
       run: make format

diff --git a/bloom/filter.go b/bloom/filter.go
@@ -43,7 +43,6 @@ func MakeSplitBlockFilter(data []byte) SplitBlockFilter {
 // filters in memory, for example:
 //
 //	f := make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(n, 10))
-//
 func NumSplitBlocksOf(numValues int64, bitsPerValue uint) int {
 	numBytes := ((uint(numValues) * bitsPerValue) + 7) / 8
 	numBlocks := (numBytes + (BlockSize - 1)) / BlockSize

diff --git a/buffer.go b/buffer.go
@@ -36,7 +36,6 @@ type Buffer struct {
 //		buffer := parquet.NewBuffer(config)
 //		...
 //	}
-//
 func NewBuffer(options ...RowGroupOption) *Buffer {
 	config, err := NewRowGroupConfig(options...)
 	if err != nil {

diff --git a/column_buffer_go18.go b/column_buffer_go18.go
@@ -19,9 +19,8 @@ import (
 //
 // - rows is the array of Go values to write to the column buffers.
 //
-// - levels is used to track the column index, repetition and definition levels
-//   of values when writing optional or repeated columns.
-//
+//   - levels is used to track the column index, repetition and definition levels
+//     of values when writing optional or repeated columns.
 type writeRowsFunc func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error
 
 // writeRowsFuncOf generates a writeRowsFunc function for the given Go type and

diff --git a/config.go b/config.go
@@ -28,7 +28,6 @@ const (
 //		SkipPageIndex:    true,
 //		SkipBloomFilters: true,
 //	})
-//
 type FileConfig struct {
 	SkipPageIndex    bool
 	SkipBloomFilters bool
@@ -82,7 +81,6 @@ func (c *FileConfig) Validate() error {
 //	reader := parquet.NewReader(output, schema, &parquet.ReaderConfig{
 //		// ...
 //	})
-//
 type ReaderConfig struct {
 	Schema *Schema
 }
@@ -131,7 +129,6 @@ func (c *ReaderConfig) Validate() error {
 //	writer := parquet.NewWriter(output, schema, &parquet.WriterConfig{
 //		CreatedBy: "my test program",
 //	})
-//
 type WriterConfig struct {
 	CreatedBy            string
 	ColumnPageBuffers    PageBufferPool
@@ -225,7 +222,6 @@ func (c *WriterConfig) Validate() error {
 //	buffer := parquet.NewBuffer(&parquet.RowGroupConfig{
 //		ColumnBufferCapacity: 10_000,
 //	})
-//
 type RowGroupConfig struct {
 	ColumnBufferCapacity int
 	SortingColumns       []SortingColumn

diff --git a/file.go b/file.go
@@ -147,19 +147,19 @@ func OpenFile(r io.ReaderAt, size int64, options ...FileOption) (*File, error) {
 // Only leaf columns have indexes, the returned indexes are arranged using the
 // following layout:
 //
-//	+ -------------- +
-//	| col 0: chunk 0 |
-//	+ -------------- +
-//	| col 1: chunk 0 |
-//	+ -------------- +
-//	| ...            |
-//	+ -------------- +
-//	| col 0: chunk 1 |
-//	+ -------------- +
-//	| col 1: chunk 1 |
-//	+ -------------- +
-//	| ...            |
-//	+ -------------- +
+//   - -------------- +
+//     | col 0: chunk 0 |
+//   - -------------- +
+//     | col 1: chunk 0 |
+//   - -------------- +
+//     | ...            |
+//   - -------------- +
+//     | col 0: chunk 1 |
+//   - -------------- +
+//     | col 1: chunk 1 |
+//   - -------------- +
+//     | ...            |
+//   - -------------- +
 //
 // This method is useful in combination with the SkipPageIndex option to delay
 // reading the page index section until after the file was opened. Note that in

diff --git a/format/parquet.go b/format/parquet.go
@@ -281,11 +281,11 @@ func (t *LogicalType) String() string {
 
 // Represents a element inside a schema definition.
 //
-//	- if it is a group (inner node) then type is undefined and num_children is
-//    defined
+//   - if it is a group (inner node) then type is undefined and num_children is
+//     defined
 //
-//	- if it is a primitive type (leaf) then type is defined and num_children is
-//    undefined
+//   - if it is a primitive type (leaf) then type is defined and num_children is
+//     undefined
 //
 // The nodes are listed in depth first traversal order.
 type SchemaElement struct {

diff --git a/go.mod b/go.mod
@@ -1,6 +1,6 @@
 module github.com/segmentio/parquet-go
 
-go 1.18
+go 1.19
 
 require (
 	github.com/andybalholm/brotli v1.0.3

diff --git a/hashprobe/hashprobe.go b/hashprobe/hashprobe.go
@@ -149,7 +149,7 @@ func (t *Uint32Table) ProbeArray(keys sparse.Uint32Array, values []int32) int {
 //
 // The table uses the following memory layout:
 //
-//		[group 0][group 1][...][group N]
+//	[group 0][group 1][...][group N]
 //
 // Each group contains up to 7 key/value pairs, and is exactly 64 bytes in size,
 // which allows it to fit within a single cache line, and ensures that probes
@@ -598,7 +598,7 @@ func (t *Uint128Table) ProbeArray(keys sparse.Uint128Array, values []int32) int
 //
 // This table uses the following memory layout:
 //
-//		[key A][key B][...][value A][value B][...]
+//	[key A][key B][...][value A][value B][...]
 //
 // The table stores values as their actual value plus one, and uses zero as a
 // sentinel to determine whether a slot is occupied. A linear probing strategy

diff --git a/internal/bytealg/count_amd64.go b/internal/bytealg/count_amd64.go
@@ -12,7 +12,6 @@ package bytealg
 // name       old speed      new speed      delta
 // CountByte  49.6GB/s ± 0%  93.2GB/s ± 0%  +87.74%  (p=0.000 n=10+10)
 //
-//
 // On systems that do not have AVX-512, the AVX2 version of the code is also
 // optimized to make use of multiple register lanes, which gives a bit better
 // throughput than the standard library function:
@@ -23,6 +22,5 @@ package bytealg
 // name       old speed      new speed      delta
 // CountByte  49.6GB/s ± 0%  67.1GB/s ± 0%  +35.21%  (p=0.000 n=10+10)
 //
-//
 //go:noescape
 func Count(data []byte, value byte) int
diff --git a/internal/unsafecast/unsafecast_go18.go b/internal/unsafecast/unsafecast_go18.go
@@ -8,8 +8,7 @@
 // casting a [][16]byte to a []byte in order to use functions of the standard
 // bytes package on the slices.
 //
-//		With great power comes great responsibility.
-//
+//	With great power comes great responsibility.
 package unsafecast
 
 import (

diff --git a/page_bounds_amd64.go b/page_bounds_amd64.go
@@ -23,7 +23,6 @@ package parquet
 // running more AVX-512 instructions in the tight loops causes more contention
 // on CPU ports.
 //
-//
 // Optimizations being trade offs, using min/max functions independently appears
 // to yield better throughput when the data resides in CPU caches:
 //

diff --git a/reader.go b/reader.go
@@ -28,7 +28,6 @@ import (
 //		...
 //	}
 //
-//
 // For programs building with Go 1.18 or later, the GenericReader[T] type
 // supersedes this one.
 type Reader struct {
@@ -61,7 +60,6 @@ type Reader struct {
 //		reader := parquet.NewReader(input, config)
 //		...
 //	}
-//
 func NewReader(input io.ReaderAt, options ...ReaderOption) *Reader {
 	c, err := NewReaderConfig(options...)
 	if err != nil {

diff --git a/schema.go b/schema.go
@@ -59,14 +59,14 @@ type Schema struct {
 //	timestamp | for int64 types use the TIMESTAMP logical type with, by default, millisecond precision
 //	split     | for float32/float64, use the BYTE_STREAM_SPLIT encoding
 //
-// The date logical type is an int32 value of the number of days since the unix epoch
+// # The date logical type is an int32 value of the number of days since the unix epoch
 //
 // The timestamp precision can be changed by defining which precision to use as an argument.
 // Supported precisions are: nanosecond, millisecond and microsecond. Example:
 //
-//  type Message struct {
-//    TimestrampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"
-//  }
+//	type Message struct {
+//	  TimestrampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"
+//	}
 //
 // The decimal tag must be followed by two integer parameters, the first integer
 // representing the scale and the second the precision; for example:
@@ -90,9 +90,9 @@ type Schema struct {
 //
 // For example, the following will set the int64 key of the map to be a timestamp:
 //
-//  type Actions struct {
-//    Action map[int64]string `parquet:"," parquet-key:",timestamp"`
-//  }
+//	type Actions struct {
+//	  Action map[int64]string `parquet:"," parquet-key:",timestamp"`
+//	}
 //
 // The schema name is the Go type name of the value.
 func SchemaOf(model interface{}) *Schema {

diff --git a/search.go b/search.go
@@ -56,7 +56,6 @@ func Search(index ColumnIndex, value Value, typ Type) int {
 //	pageIndex := parquet.Find(columnIndex, value,
 //		parquet.CompareNullsFirst(typ.Compare),
 //	)
-//
 func Find(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
 	switch {
 	case index.IsAscending():

diff --git a/sort.go b/sort.go
@@ -10,7 +10,6 @@ package parquet
 //		Descending: true,
 //		NullsFirst: true,
 //	})
-//
 type SortConfig struct {
 	MaxRepetitionLevel int
 	MaxDefinitionLevel int

diff --git a/value.go b/value.go
@@ -447,19 +447,19 @@ func (v Value) AppendBytes(b []byte) []byte {
 //
 // The following formatting options are supported:
 //
-//		%c	prints the column index
-//		%+c	prints the column index, prefixed with "C:"
-//		%d	prints the definition level
-//		%+d	prints the definition level, prefixed with "D:"
-//		%r	prints the repetition level
-//		%+r	prints the repetition level, prefixed with "R:"
-//		%q	prints the quoted representation of v
-//		%+q	prints the quoted representation of v, prefixed with "V:"
-//		%s	prints the string representation of v
-//		%+s	prints the string representation of v, prefixed with "V:"
-//		%v	same as %s
-//		%+v	prints a verbose representation of v
-//		%#v	prints a Go value representation of v
+//	%c	prints the column index
+//	%+c	prints the column index, prefixed with "C:"
+//	%d	prints the definition level
+//	%+d	prints the definition level, prefixed with "D:"
+//	%r	prints the repetition level
+//	%+r	prints the repetition level, prefixed with "R:"
+//	%q	prints the quoted representation of v
+//	%+q	prints the quoted representation of v, prefixed with "V:"
+//	%s	prints the string representation of v
+//	%+s	prints the string representation of v, prefixed with "V:"
+//	%v	same as %s
+//	%+v	prints a verbose representation of v
+//	%#v	prints a Go value representation of v
 //
 // Format satisfies the fmt.Formatter interface.
 func (v Value) Format(w fmt.State, r rune) {

diff --git a/writer.go b/writer.go
@@ -68,7 +68,6 @@ type Writer struct {
 //		writer := parquet.NewWriter(output, config)
 //		...
 //	}
-//
 func NewWriter(output io.Writer, options ...WriterOption) *Writer {
 	config, err := NewWriterConfig(options...)
 	if err != nil {

diff --git a/writer_go18.go b/writer_go18.go
@@ -12,29 +12,29 @@ import (
 //
 // Using this type over Writer has multiple advantages:
 //
-// - By leveraging type information, the Go compiler can provide greater
-//   guarantees that the code is correct. For example, the parquet.Writer.Write
-//   method accepts an argument of type interface{}, which delays type checking
-//   until runtime. The parquet.GenericWriter[T].Write method ensures at
-//   compile time that the values it receives will be of type T, reducing the
-//   risk of introducing errors.
+//   - By leveraging type information, the Go compiler can provide greater
+//     guarantees that the code is correct. For example, the parquet.Writer.Write
+//     method accepts an argument of type interface{}, which delays type checking
+//     until runtime. The parquet.GenericWriter[T].Write method ensures at
+//     compile time that the values it receives will be of type T, reducing the
+//     risk of introducing errors.
 //
-// - Since type information is known at compile time, the implementation of
-//   parquet.GenericWriter[T] can make safe assumptions, removing the need for
-//   runtime validation of how the parameters are passed to its methods.
-//   Optimizations relying on type information are more effective, some of the
-//   writer's state can be precomputed at initialization, which was not possible
-//   with parquet.Writer.
+//   - Since type information is known at compile time, the implementation of
+//     parquet.GenericWriter[T] can make safe assumptions, removing the need for
+//     runtime validation of how the parameters are passed to its methods.
+//     Optimizations relying on type information are more effective, some of the
+//     writer's state can be precomputed at initialization, which was not possible
+//     with parquet.Writer.
 //
-// - The parquet.GenericWriter[T].Write method uses a data-oriented design,
-//   accepting an slice of T instead of a single value, creating more
-//   opportunities to amortize the runtime cost of abstractions.
-//   This optimization is not available for parquet.Writer because its Write
-//   method's argument would be of type []interface{}, which would require
-//   conversions back and forth from concrete types to empty interfaces (since
-//   a []T cannot be interpreted as []interface{} in Go), would make the API
-//   more difficult to use and waste compute resources in the type conversions,
-//   defeating the purpose of the optimization in the first place.
+//   - The parquet.GenericWriter[T].Write method uses a data-oriented design,
+//     accepting an slice of T instead of a single value, creating more
+//     opportunities to amortize the runtime cost of abstractions.
+//     This optimization is not available for parquet.Writer because its Write
+//     method's argument would be of type []interface{}, which would require
+//     conversions back and forth from concrete types to empty interfaces (since
+//     a []T cannot be interpreted as []interface{} in Go), would make the API
+//     more difficult to use and waste compute resources in the type conversions,
+//     defeating the purpose of the optimization in the first place.
 //
 // Note that this type is only available when compiling with Go 1.18 or later.
 type GenericWriter[T any] struct {