Skip to content

Commit

Permalink
feat: optimize fastQuo by Knuth Algorithm D (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
quagmt authored Oct 25, 2024
1 parent cf139d3 commit 623f452
Show file tree
Hide file tree
Showing 14 changed files with 353 additions and 472 deletions.
3 changes: 3 additions & 0 deletions .codecov.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
coverage:
status:
project: false
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
.PHONY: test lint fuzz fuzz-all

inline:
go build -gcflags='-m ' ./... | grep -v 'can inline'

lint:
@golangci-lint --config=.golangci.yaml run ./... -v
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,11 @@ Therefore, in most cases you can expect high performance and no memory allocatio

## Credits

This library is inspired by [govalues/decimal](https://github.com/govalues/decimal) and [lukechampine/uint128](https://github.com/lukechampine/uint128)
This library is inspired by these repositories:

- [govalues/decimal](https://github.com/govalues/decimal)
- [lukechampine/uint128](https://github.com/lukechampine/uint128)
- [ridiculousfish/libdivide](https://github.com/ridiculousfish/libdivide)

## License

Expand Down
5 changes: 3 additions & 2 deletions benchmarks/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

bench:
@go test -bench BenchmarkMarshalJSON -benchmem -memprofile mem.out -cpuprofile cpu.out -run NONE
# @go test -bench BenchmarkMarshalJSON -benchmem -memprofile mem.out -cpuprofile cpu.out -run NONE
@go test -bench BenchmarkDiv -benchmem -count=10 -run NONE > new.txt

bench-udec:
@rm -f bench-udec.txt
Expand All @@ -12,5 +13,5 @@ bench-ss:
@go test -bench=./ss -benchmem -count=10 -timeout=1h> bench-ss.txt
@sed -i 's/ss\///g' bench-ss.txt

stats: bench-ss bench-udec
stats:
@benchstat shopspring=bench-ss.txt udecimal=bench-udec.txt > benchstat.txt
170 changes: 90 additions & 80 deletions benchmarks/bench-ss.txt

Large diffs are not rendered by default.

172 changes: 91 additions & 81 deletions benchmarks/bench-udec.txt

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion benchmarks/benchmarks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ func BenchmarkDiv(b *testing.B) {
{"123456.123456", "999999"},
{"123456.123456", "456781244.1324897546"},
{"548751.15465466546", "1542.456487"},
{"22773757910726981402256170801141121114", "811656739243220271.159"},
}

for _, tc := range testcases {
Expand Down Expand Up @@ -281,7 +282,7 @@ func BenchmarkDiv(b *testing.B) {
}
}

func BenchmarkDivFallback(b *testing.B) {
func BenchmarkFallbackDiv(b *testing.B) {
testcases := []struct {
a, b string
}{
Expand Down
25 changes: 14 additions & 11 deletions benchmarks/benchstat.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,15 @@ Mul/3.Mul(7)-32
Mul/123456.123456.Mul(999999)-32 102.350n ± 9% 5.564n ± 4% -94.56% (p=0.000 n=10)
Mul/123456.123456.Mul(456781244.1324897546)-32 100.300n ± 8% 5.548n ± 3% -94.47% (p=0.000 n=10)
Mul/548751.15465466546.Mul(1542.456487)-32 103.200n ± 6% 5.467n ± 4% -94.70% (p=0.000 n=10)
Div/1234567890123456789.1234567890123456879.Div(1111.1789)-32 531.550n ± 9% 7.947n ± 5% -98.50% (p=0.000 n=10)
Div/12345.1234567890123456879.Div(1111.1234567890123456789)-32 599.30n ± 35% 24.46n ± 4% -95.92% (p=0.000 n=10)
Div/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 489.35n ± 22% 17.44n ± 4% -96.44% (p=0.000 n=10)
Div/123.456.Div(0.123)-32 445.500n ± 8% 7.383n ± 5% -98.34% (p=0.000 n=10)
Div/3.Div(7)-32 523.050n ± 11% 7.394n ± 6% -98.59% (p=0.000 n=10)
Div/123456.123456.Div(999999)-32 444.800n ± 7% 7.200n ± 3% -98.38% (p=0.000 n=10)
Div/123456.123456.Div(456781244.1324897546)-32 557.050n ± 9% 7.387n ± 5% -98.67% (p=0.000 n=10)
Div/548751.15465466546.Div(1542.456487)-32 586.900n ± 12% 7.721n ± 4% -98.68% (p=0.000 n=10)
Div/1234567890123456789.1234567890123456879.Div(1111.1789)-32 443.000n ± 8% 8.087n ± 5% -98.17% (p=0.000 n=10)
Div/12345.1234567890123456879.Div(1111.1234567890123456789)-32 464.75n ± 14% 13.04n ± 6% -97.20% (p=0.000 n=10)
Div/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 483.30n ± 6% 11.78n ± 3% -97.56% (p=0.000 n=10)
Div/123.456.Div(0.123)-32 374.450n ± 6% 8.040n ± 6% -97.85% (p=0.000 n=10)
Div/3.Div(7)-32 380.250n ± 30% 7.813n ± 6% -97.95% (p=0.000 n=10)
Div/123456.123456.Div(999999)-32 373.250n ± 7% 7.833n ± 5% -97.90% (p=0.000 n=10)
Div/123456.123456.Div(456781244.1324897546)-32 452.800n ± 5% 7.783n ± 5% -98.28% (p=0.000 n=10)
Div/548751.15465466546.Div(1542.456487)-32 473.150n ± 6% 8.478n ± 6% -98.21% (p=0.000 n=10)
Div/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 509.35n ± 39% 15.65n ± 5% -96.93% (p=0.000 n=10)
DivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 762.1n ± 7% 311.6n ± 17% -59.12% (p=0.000 n=10)
DivFallback/1234.Div(12345679012345679890123456789.1234567890123456789)-32 242.1n ± 87% 326.6n ± 11% ~ (p=0.481 n=10)
Pow/1.01.Pow(10)-32 724.70n ± 31% 38.25n ± 4% -94.72% (p=0.000 n=10)
Expand Down Expand Up @@ -74,7 +75,7 @@ UnmarshalBinary/123456.123456-32
UnmarshalBinary/1234567890-32 65.140n ± 38% 1.790n ± 5% -97.25% (p=0.000 n=10)
UnmarshalBinary/0.1234567890123456879-32 45.120n ± 18% 1.854n ± 5% -95.89% (p=0.000 n=10)
UnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 72.44n ± 11% 75.91n ± 24% ~ (p=0.684 n=10)
geomean 183.7n 18.93n -89.69%
geomean 182.4n 18.72n -89.74%

│ shopspring │ udecimal │
│ B/op │ B/op vs base │
Expand Down Expand Up @@ -120,6 +121,7 @@ Div/3.Div(7)-32
Div/123456.123456.Div(999999)-32 288.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10)
Div/123456.123456.Div(456781244.1324897546)-32 368.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10)
Div/548751.15465466546.Div(1542.456487)-32 368.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10)
Div/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 336.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10)
DivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 496.0 ± 0% 264.0 ± 0% -46.77% (p=0.000 n=10)
DivFallback/1234.Div(12345679012345679890123456789.1234567890123456789)-32 272.0 ± 0% 320.0 ± 0% +17.65% (p=0.000 n=10)
Pow/1.01.Pow(10)-32 576.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10)
Expand Down Expand Up @@ -148,7 +150,7 @@ UnmarshalBinary/123456.123456-32
UnmarshalBinary/1234567890-32 40.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
UnmarshalBinary/0.1234567890123456879-32 40.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
UnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=10) ¹
geomean 120.6 ? ² ³
geomean 122.4 ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
Expand Down Expand Up @@ -197,6 +199,7 @@ Div/3.Div(7)-32
Div/123456.123456.Div(999999)-32 10.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
Div/123456.123456.Div(456781244.1324897546)-32 12.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
Div/548751.15465466546.Div(1542.456487)-32 12.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
Div/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 9.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10)
DivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 16.000 ± 0% 7.000 ± 0% -56.25% (p=0.000 n=10)
DivFallback/1234.Div(12345679012345679890123456789.1234567890123456789)-32 8.000 ± 0% 7.000 ± 0% -12.50% (p=0.000 n=10)
Pow/1.01.Pow(10)-32 18.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
Expand Down Expand Up @@ -225,7 +228,7 @@ UnmarshalBinary/123456.123456-32
UnmarshalBinary/1234567890-32 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10)
UnmarshalBinary/0.1234567890123456879-32 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10)
UnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹
geomean 4.268 ? ² ³
geomean 4.313 ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
4 changes: 3 additions & 1 deletion decimal.go
Original file line number Diff line number Diff line change
Expand Up @@ -565,10 +565,12 @@ func (d Decimal) Div64(v uint64) (Decimal, error) {

if !d.coef.overflow() {
d256 := d.coef.u128.MulToU256(pow10[defaultPrec-d.prec])
quo, _, err := d256.quoRem64Tou128(v)
quo, _, err := d256.div192by64(v)
if err == nil {
return newDecimal(d.neg, bintFromU128(quo), defaultPrec), nil
}

// overflow, try with *big.Int
}

// overflow, try with *big.Int
Expand Down
15 changes: 13 additions & 2 deletions decimal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -859,9 +859,13 @@ func TestDiv(t *testing.T) {
overflow bool
wantErr error
}{
{"22773757910726981402256170801141121114", "811656739243220271.159", false, nil},
{"22773757910726981402256170801141121024", "2277375793122336353220649475.264577813", false, nil},
{"2345678901234567899", "1234567890123456789.1234567890123456789", false, nil},
{"123456.123", "8796093022208", false, nil},
{"1844674407370955161.5999999999", "18446744073709551616", false, nil},
{"1000000000000", "0.0000001", false, nil},
{"479615345916448342049", "1494.186269970473681015", true, nil},
{"479615345916448342049", "1494.186269970473681015", false, nil},
{"123456.1234567890123456789", "234567.1234567890123456789", false, nil},
{"123456.1234567890123456789", "1", false, nil},
{"-123456.1234567890123456789", "234567.1234567890123456789", false, nil},
Expand All @@ -874,7 +878,6 @@ func TestDiv(t *testing.T) {
{"123456789012345678.9", "0.1", false, nil},
{"1111111111111", "1111.123456789123456789", false, nil},
{"123456789", "1.1234567890123456789", false, nil},
{"2345678901234567899", "1234567890123456789.1234567890123456789", false, nil},
{"0.1234567890123456789", "0.04586201546101", false, nil},
{"1", "1111.123456789123456789", false, nil},
{"1", "1.123456789123456789", false, nil},
Expand Down Expand Up @@ -2318,6 +2321,14 @@ func TestPrecUint(t *testing.T) {
require.Equal(t, oneUnit.prec, b.PrecUint())
}
})
}
}

func BenchmarkDiv(b *testing.B) {
a := MustParse("22773757910726981402256170801141121114")
bb := MustParse("811656739243220271.159")

for i := 0; i < b.N; i++ {
_, _ = a.Div(bb)
}
}
9 changes: 9 additions & 0 deletions testdata/fuzz/FuzzDivDec/d1ac6afed7c33047
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
go test fuzz v1
bool(true)
uint64(1234567890123456789)
uint64(90)
byte('\x00')
bool(true)
uint64(44)
uint64(55)
byte('b')
49 changes: 25 additions & 24 deletions u128.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

var (
one128 = u128{lo: 1}
max128 = u128{hi: ^uint64(0), lo: ^uint64(0)}
)

// u128 (big unsigned-integer) is a 128-bits unsigned integer
Expand Down Expand Up @@ -72,14 +73,6 @@ func (u u128) Cmp64(v uint64) int {
}
}

func (u u128) LessThan(v u128) bool {
if u.hi < v.hi || (u.hi == v.hi && u.lo < v.lo) {
return true
}

return false
}

func (u u128) Add(v u128) (u128, error) {
lo, carry := bits.Add64(u.lo, v.lo, 0)
hi, carry := bits.Add64(u.hi, v.hi, carry)
Expand Down Expand Up @@ -283,36 +276,44 @@ func (u u128) ToBigInt() *big.Int {
// getTrailingZeros64 returns the number of trailing zeros in u
// NOTE: this only works when maxPrec is 19
func getTrailingZeros64(u uint64) uint8 {
var z uint8
var zeros uint8

// max scale is 19, so we can start from 16
if u%1e16 == 0 {
z = 16
zeros += 16
u /= 1e16

if u%pow10[z+2].lo == 0 {
z += 2
// short path, because we know that max scale is 19
if u%100 == 0 {
zeros += 2
u /= 100
}

if u%pow10[z+1].lo == 0 {
z++
if u%10 == 0 {
zeros++
}

return z
return zeros
}

if u%pow10[8].lo == 0 {
z += 8
if u%1e8 == 0 {
zeros += 8
u /= 1e8
}

if u%pow10[z+4].lo == 0 {
z += 4
if u%1e4 == 0 {
zeros += 4
u /= 1e4
}

if u%pow10[z+2].lo == 0 {
z += 2
if u%100 == 0 {
zeros += 2
u /= 100
}

if u%pow10[z+1].lo == 0 {
z++
if u%10 == 0 {
zeros++
}

return z
return zeros
}
Loading

0 comments on commit 623f452

Please sign in to comment.