Skip to content

Commit

Permalink
Type conversions (xitongsys#456)
Browse files Browse the repository at this point in the history
* add row transform reader/writer

* cleanup

* use sentinel error in transforms to indicate that the output buffer is too short

* add row index to filter function

* add row index to scan function

* remove row index from transforms and filters

* refactor row layout

* simplify setMakeSlice

* simplify transform API

* refactor and enhance conversion API using new row layout

* cleanup debug printf

* remove unused method

* remove debug t.Logf

* use direct function calls in parquet.Type implementations instead of relying on the global variables

* implement type conversions between physical types

* implement type conversions between logical types

* revert changes to intType.Compare to fix preformance regression

* test type conversions

* refuse conversion between enum/json/bson and other types

* add conversions between date/time/string

* add timestampt conversions

* PR feedback: rename functions to be more descriptive

* move isZero to parquet.go so it exists on Go 1.17
  • Loading branch information
Achille authored Dec 13, 2022
1 parent 4f03ab9 commit efaee6e
Show file tree
Hide file tree
Showing 8 changed files with 1,692 additions and 379 deletions.
480 changes: 480 additions & 0 deletions convert.go

Large diffs are not rendered by default.

644 changes: 633 additions & 11 deletions convert_test.go

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions deprecated/int96.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,29 @@ import (
// Int96 is an implementation of the deprecated INT96 parquet type.
type Int96 [3]uint32

// Int32ToInt96 converts a int32 value to a Int96.
func Int32ToInt96(value int32) (i96 Int96) {
if value < 0 {
i96[2] = 0xFFFFFFFF
i96[1] = 0xFFFFFFFF
}
i96[0] = uint32(value)
return
}

// Int64ToInt96 converts a int64 value to Int96.
func Int64ToInt96(value int64) (i96 Int96) {
if value < 0 {
i96[2] = 0xFFFFFFFF
}
i96[1] = uint32(value >> 32)
i96[0] = uint32(value)
return
}

// IsZero returns true if i is the zero-value.
func (i Int96) IsZero() bool { return i == Int96{} }

// Negative returns true if i is a negative value.
func (i Int96) Negative() bool {
return (i[2] >> 31) != 0
Expand Down Expand Up @@ -48,6 +71,16 @@ func (i Int96) Int() *big.Int {
return z
}

// Int32 converts i to a int32, potentially truncating the value.
func (i Int96) Int32() int32 {
return int32(i[0])
}

// Int64 converts i to a int64, potentially truncating the value.
func (i Int96) Int64() int64 {
return int64(i[1])<<32 | int64(i[0])
}

// String returns a string representation of i.
func (i Int96) String() string {
return i.Int().String()
Expand Down
5 changes: 5 additions & 0 deletions errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ var (
// ErrTooManyRowGroups is returned when attempting to generate a parquet
// file with more than MaxRowGroups row groups.
ErrTooManyRowGroups = errors.New("the limit of 32767 row groups has been reached")

// ErrConversion is used to indicate that a conversion betwen two values
// cannot be done because there are no rules to translate between their
// physical types.
ErrInvalidConversion = errors.New("invalid conversion between parquet values")
)

type errno int
Expand Down
9 changes: 0 additions & 9 deletions null.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,3 @@ func nullIndexFuncOfByteArray(n int) nullIndexFunc {
}
}
}

func isZero(b []byte) bool {
for _, c := range b {
if c != 0 {
return false
}
}
return true
}
9 changes: 9 additions & 0 deletions parquet.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,12 @@ func typeNameOf(t reflect.Type) string {
}
return s1 + " (" + s2 + ")"
}

func isZero(b []byte) bool {
for _, c := range b {
if c != 0 {
return false
}
}
return true
}
Loading

0 comments on commit efaee6e

Please sign in to comment.