apacheGH-38395: [Go] fix rounding errors in decimal256 string functio…

…ns (apache#38426) ### Rationale for this change Slight modifications to the `ToString` and `FromString` methods of decimal 256 to ensure proper accurate and proper conversion given precision and scale. ### Are these changes tested? Yes * Closes: apache#38395 Authored-by: Matt Topol <[email protected]> Signed-off-by: Matt Topol <[email protected]>
loicalleyne · Nov 13, 2023 · db15b29 · db15b29
1 parent 109618d
commit db15b29
Show file tree

Hide file tree

Showing 4 changed files with 124 additions and 30 deletions.
diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go
@@ -266,23 +266,35 @@ func FromString(v string, prec, scale int32) (n Num, err error) {
 		return
 	}
 
-	// Since we're going to truncate this to get an integer, we need to round
-	// the value instead because of edge cases so that we match how other implementations
-	// (e.g. C++) handles Decimal values. So if we're negative we'll subtract 0.5 and if
-	// we're positive we'll add 0.5.
-	out.Mul(out, big.NewFloat(math.Pow10(int(scale)))).SetPrec(precInBits)
-	if out.Signbit() {
-		out.Sub(out, pt5)
+	if scale < 0 {
+		var tmp big.Int
+		val, _ := out.Int(&tmp)
+		if val.BitLen() > 127 {
+			return Num{}, errors.New("bitlen too large for decimal128")
+		}
+		n = FromBigInt(val)
+		n, _ = n.Div(scaleMultipliers[-scale])
 	} else {
-		out.Add(out, pt5)
-	}
+		// Since we're going to truncate this to get an integer, we need to round
+		// the value instead because of edge cases so that we match how other implementations
+		// (e.g. C++) handles Decimal values. So if we're negative we'll subtract 0.5 and if
+		// we're positive we'll add 0.5.
+		p := (&big.Float{}).SetInt(scaleMultipliers[scale].BigInt())
+		out.Mul(out, p).SetPrec(precInBits)
+		if out.Signbit() {
+			out.Sub(out, pt5)
+		} else {
+			out.Add(out, pt5)
+		}
 
-	var tmp big.Int
-	val, _ := out.Int(&tmp)
-	if val.BitLen() > 127 {
-		return Num{}, errors.New("bitlen too large for decimal128")
+		var tmp big.Int
+		val, _ := out.Int(&tmp)
+		if val.BitLen() > 127 {
+			return Num{}, errors.New("bitlen too large for decimal128")
+		}
+		n = FromBigInt(val)
 	}
-	n = FromBigInt(val)
+
 	if !n.FitsInPrecision(prec) {
 		err = fmt.Errorf("val %v doesn't fit in precision %d", n, prec)
 	}
@@ -505,7 +517,11 @@ func (n Num) FitsInPrecision(prec int32) bool {
 
 func (n Num) ToString(scale int32) string {
 	f := (&big.Float{}).SetInt(n.BigInt())
-	f.Quo(f, (&big.Float{}).SetInt(scaleMultipliers[scale].BigInt()))
+	if scale < 0 {
+		f.SetPrec(128).Mul(f, (&big.Float{}).SetInt(scaleMultipliers[-scale].BigInt()))
+	} else {
+		f.SetPrec(128).Quo(f, (&big.Float{}).SetInt(scaleMultipliers[scale].BigInt()))
+	}
 	return f.Text('f', int(scale))
 }
 

diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go
@@ -659,6 +659,7 @@ func TestFromString(t *testing.T) {
 		{"1e-37", 1, 37},
 		{"2112.33", 211233, 2},
 		{"-2112.33", -211233, 2},
+		{"12E2", 12, -2},
 	}
 
 	for _, tt := range tests {
@@ -681,3 +682,19 @@ func TestInvalidNonNegScaleFromString(t *testing.T) {
 		})
 	}
 }
+
+func TestBitLen(t *testing.T) {
+	n := decimal128.GetScaleMultiplier(38)
+	b := n.BigInt()
+	b.Mul(b, big.NewInt(25))
+	assert.Greater(t, b.BitLen(), 128)
+
+	assert.Panics(t, func() {
+		decimal128.FromBigInt(b)
+	})
+
+	_, err := decimal128.FromString(b.String(), decimal128.MaxPrecision, 0)
+	assert.ErrorContains(t, err, "bitlen too large for decimal128")
+	_, err = decimal128.FromString(b.String(), decimal128.MaxPrecision, -1)
+	assert.ErrorContains(t, err, "bitlen too large for decimal128")
+}
diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go
@@ -154,23 +154,34 @@ func FromString(v string, prec, scale int32) (n Num, err error) {
 		return
 	}
 
-	out.Mul(out, big.NewFloat(math.Pow10(int(scale)))).SetPrec(precInBits)
-	// Since we're going to truncate this to get an integer, we need to round
-	// the value instead because of edge cases so that we match how other implementations
-	// (e.g. C++) handles Decimal values. So if we're negative we'll subtract 0.5 and if
-	// we're positive we'll add 0.5.
-	if out.Signbit() {
-		out.Sub(out, pt5)
+	if scale < 0 {
+		var tmp big.Int
+		val, _ := out.Int(&tmp)
+		if val.BitLen() > 255 {
+			return Num{}, errors.New("bitlen too large for decimal256")
+		}
+		n = FromBigInt(val)
+
+		n, _ = n.Div(scaleMultipliers[-scale])
 	} else {
-		out.Add(out, pt5)
-	}
+		out.Mul(out, (&big.Float{}).SetInt(scaleMultipliers[scale].BigInt())).SetPrec(precInBits)
+		// Since we're going to truncate this to get an integer, we need to round
+		// the value instead because of edge cases so that we match how other implementations
+		// (e.g. C++) handles Decimal values. So if we're negative we'll subtract 0.5 and if
+		// we're positive we'll add 0.5.
+		if out.Signbit() {
+			out.Sub(out, pt5)
+		} else {
+			out.Add(out, pt5)
+		}
 
-	var tmp big.Int
-	val, _ := out.Int(&tmp)
-	if val.BitLen() > 255 {
-		return Num{}, errors.New("bitlen too large for decimal256")
+		var tmp big.Int
+		val, _ := out.Int(&tmp)
+		if val.BitLen() > 255 {
+			return Num{}, errors.New("bitlen too large for decimal256")
+		}
+		n = FromBigInt(val)
 	}
-	n = FromBigInt(val)
 	if !n.FitsInPrecision(prec) {
 		err = fmt.Errorf("value %v doesn't fit in precision %d", n, prec)
 	}
@@ -506,7 +517,11 @@ func (n Num) FitsInPrecision(prec int32) bool {
 
 func (n Num) ToString(scale int32) string {
 	f := (&big.Float{}).SetInt(n.BigInt())
-	f.Quo(f, (&big.Float{}).SetInt(scaleMultipliers[scale].BigInt()))
+	if scale < 0 {
+		f.SetPrec(256).Mul(f, (&big.Float{}).SetInt(scaleMultipliers[-scale].BigInt()))
+	} else {
+		f.SetPrec(256).Quo(f, (&big.Float{}).SetInt(scaleMultipliers[scale].BigInt()))
+	}
 	return f.Text('f', int(scale))
 }
 

diff --git a/go/arrow/decimal256/decimal256_test.go b/go/arrow/decimal256/decimal256_test.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"math"
 	"math/big"
+	"strings"
 	"testing"
 
 	"github.com/apache/arrow/go/v14/arrow/decimal256"
@@ -563,6 +564,7 @@ func TestFromString(t *testing.T) {
 		{"1e-37", 1, 37},
 		{"2112.33", 211233, 2},
 		{"-2112.33", -211233, 2},
+		{"12E2", 12, -2},
 	}
 
 	for _, tt := range tests {
@@ -575,3 +577,47 @@ func TestFromString(t *testing.T) {
 		})
 	}
 }
+
+// Test issues from GH-38395
+func TestToString(t *testing.T) {
+	const decStr = "3379334159166193114608287418738414931564221155305735605033949613740461239999"
+
+	integer, _ := (&big.Int{}).SetString(decStr, 10)
+	dec := decimal256.FromBigInt(integer)
+
+	expected := "0." + decStr
+	assert.Equal(t, expected, dec.ToString(int32(len(decStr))))
+	assert.Equal(t, decStr+"0000", dec.ToString(-4))
+}
+
+// Test issues from GH-38395
+func TestHexFromString(t *testing.T) {
+	const decStr = "11111111111111111111111111111111111111.00000000000000000000000000000000000000"
+
+	num, err := decimal256.FromString(decStr, 76, 38)
+	if err != nil {
+		t.Error(err)
+	} else if decStr != num.ToString(38) {
+		t.Errorf("expected: %s, actual: %s\n", decStr, num.ToString(38))
+
+		actualCoeff := num.BigInt()
+		expectedCoeff, _ := (&big.Int{}).SetString(strings.Replace(decStr, ".", "", -1), 10)
+		t.Errorf("expected(hex): %X, actual(hex): %X\n", expectedCoeff.Bytes(), actualCoeff.Bytes())
+	}
+}
+
+func TestBitLen(t *testing.T) {
+	n := decimal256.GetScaleMultiplier(76)
+	b := n.BigInt()
+	b.Mul(b, big.NewInt(25))
+	assert.Greater(t, b.BitLen(), 255)
+
+	assert.Panics(t, func() {
+		decimal256.FromBigInt(b)
+	})
+
+	_, err := decimal256.FromString(b.String(), decimal256.MaxPrecision, 0)
+	assert.ErrorContains(t, err, "bitlen too large for decimal256")
+	_, err = decimal256.FromString(b.String(), decimal256.MaxPrecision, -1)
+	assert.ErrorContains(t, err, "bitlen too large for decimal256")
+}