From 623f452f09bac895e7aefb88874ac5a4ca52ba6e Mon Sep 17 00:00:00 2001 From: Quang Date: Fri, 25 Oct 2024 07:56:59 +0700 Subject: [PATCH] feat: optimize fastQuo by Knuth Algorithm D (#16) --- .codecov.yaml | 3 + Makefile | 3 + README.md | 6 +- benchmarks/Makefile | 5 +- benchmarks/bench-ss.txt | 170 +++++++------ benchmarks/bench-udec.txt | 172 +++++++------ benchmarks/benchmarks_test.go | 3 +- benchmarks/benchstat.txt | 25 +- decimal.go | 4 +- decimal_test.go | 15 +- testdata/fuzz/FuzzDivDec/d1ac6afed7c33047 | 9 + u128.go | 49 ++-- u256.go | 291 +++++++--------------- u256_test.go | 70 ------ 14 files changed, 353 insertions(+), 472 deletions(-) create mode 100644 .codecov.yaml create mode 100644 testdata/fuzz/FuzzDivDec/d1ac6afed7c33047 diff --git a/.codecov.yaml b/.codecov.yaml new file mode 100644 index 0000000..db83645 --- /dev/null +++ b/.codecov.yaml @@ -0,0 +1,3 @@ +coverage: + status: + project: false diff --git a/Makefile b/Makefile index 76c6bd6..2d75be1 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,7 @@ .PHONY: test lint fuzz fuzz-all + +inline: + go build -gcflags='-m ' ./... | grep -v 'can inline' lint: @golangci-lint --config=.golangci.yaml run ./... -v diff --git a/README.md b/README.md index d778f5a..9719cd4 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,11 @@ Therefore, in most cases you can expect high performance and no memory allocatio ## Credits -This library is inspired by [govalues/decimal](https://github.com/govalues/decimal) and [lukechampine/uint128](https://github.com/lukechampine/uint128) +This library is inspired by these repositories: + +- [govalues/decimal](https://github.com/govalues/decimal) +- [lukechampine/uint128](https://github.com/lukechampine/uint128) +- [ridiculousfish/libdivide](https://github.com/ridiculousfish/libdivide) ## License diff --git a/benchmarks/Makefile b/benchmarks/Makefile index b74f907..481c6af 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -1,6 +1,7 @@ bench: - @go test -bench BenchmarkMarshalJSON -benchmem -memprofile mem.out -cpuprofile cpu.out -run NONE + # @go test -bench BenchmarkMarshalJSON -benchmem -memprofile mem.out -cpuprofile cpu.out -run NONE + @go test -bench BenchmarkDiv -benchmem -count=10 -run NONE > new.txt bench-udec: @rm -f bench-udec.txt @@ -12,5 +13,5 @@ bench-ss: @go test -bench=./ss -benchmem -count=10 -timeout=1h> bench-ss.txt @sed -i 's/ss\///g' bench-ss.txt -stats: bench-ss bench-udec +stats: @benchstat shopspring=bench-ss.txt udecimal=bench-udec.txt > benchstat.txt diff --git a/benchmarks/bench-ss.txt b/benchmarks/bench-ss.txt index 94e8b13..39156a0 100644 --- a/benchmarks/bench-ss.txt +++ b/benchmarks/bench-ss.txt @@ -342,86 +342,96 @@ BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 14766976 103.6 ns/op 80 B/op 2 allocs/op BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 12577491 106.2 ns/op 80 B/op 2 allocs/op BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 10616454 102.8 ns/op 80 B/op 2 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2001427 558.9 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 1999024 535.4 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2252511 522.0 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2361565 546.4 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2019435 527.7 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2363082 484.3 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2259567 536.6 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2878698 565.0 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2226279 505.6 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2243221 479.1 ns/op 352 B/op 11 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 1803873 650.3 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 1901464 653.9 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 1799982 581.2 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 1961334 666.6 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2204139 617.4 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 1785332 648.8 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 3167824 387.6 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2972823 376.9 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 3897938 547.4 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2583777 445.5 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2783401 446.3 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2728230 402.9 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2907870 596.3 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2333786 449.0 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 3167222 585.9 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 3221036 475.6 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 3124442 382.3 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2228572 516.6 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2412388 609.3 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2420545 503.1 ns/op 336 B/op 9 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 3072528 410.4 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 2895246 449.6 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 2685127 437.6 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 3225652 443.3 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 2590617 419.7 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 3212510 492.7 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 2724086 464.5 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 2982030 404.2 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 2586787 447.7 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 2402697 450.6 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/3.Div(7)-32 2614386 533.5 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2541856 521.1 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2237886 511.6 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2586518 554.6 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2312083 559.3 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2242868 525.0 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2439654 454.8 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2206954 532.9 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2241189 469.7 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/3.Div(7)-32 2363046 466.4 ns/op 328 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 2764886 400.3 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 2729370 446.8 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 2604680 481.9 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 2586762 445.8 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 2889513 474.3 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 3017598 438.5 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 3401457 439.5 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 3038935 428.8 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 2563717 443.8 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 3520320 476.3 ns/op 288 B/op 10 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2140495 577.0 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2493212 613.6 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2053581 576.4 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2249961 578.1 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2454177 450.6 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2231642 506.6 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2607952 550.3 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2230626 563.8 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2729355 534.7 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 1916490 527.7 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 1991365 580.5 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2060182 566.5 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 1999614 578.3 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2286982 499.4 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2131124 602.1 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2268651 654.8 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 1953102 532.7 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2025718 593.3 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2089290 594.6 ns/op 368 B/op 12 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2093712 654.9 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2779226 450.0 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2727945 456.2 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 3039740 407.6 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2645822 436.0 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2841316 410.2 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2646810 418.0 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2677731 463.9 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 3342692 463.9 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2913794 399.2 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 2870110 450.8 ns/op 352 B/op 11 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2467432 459.0 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2595129 408.3 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2697139 388.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2848003 470.5 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2654751 474.2 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2629836 398.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2198280 507.2 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 1943968 515.1 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 2563866 458.0 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 3658836 478.5 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2284389 510.4 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2449761 550.6 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2588371 489.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2221796 473.3 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2748195 455.6 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2503466 447.2 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2375684 495.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2517376 462.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2457477 476.9 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 2262340 514.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 4193889 373.7 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 3437018 373.8 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 3095576 375.1 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 5804815 377.9 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 2959058 385.4 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 2887820 396.6 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 3487492 350.6 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 3349612 347.1 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 4151662 361.1 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 3171105 377.3 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/3.Div(7)-32 2920420 411.0 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 4824445 261.9 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 4870394 384.8 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 4682977 451.9 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 4421299 269.1 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 4438849 264.7 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 4536189 388.0 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 2801356 375.7 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 3006039 355.7 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/3.Div(7)-32 2901069 467.9 ns/op 328 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 3677923 356.4 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 3294194 363.7 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 2957396 368.3 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 2908095 399.1 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 3197464 351.5 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 2802676 400.5 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 3001760 378.2 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 2986101 398.0 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 2989735 335.4 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 3018793 397.5 ns/op 288 B/op 10 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2489937 474.4 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2517294 472.3 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2637439 496.5 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2912424 443.1 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2701718 430.4 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2543385 433.7 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2657427 462.7 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2539336 455.9 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2962663 427.5 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 2882956 449.7 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2550741 454.1 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2764209 489.1 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2722330 443.6 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2815402 476.5 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2319199 468.1 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2699635 469.8 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2687140 497.7 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2843178 429.0 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2606370 478.0 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 2271256 515.6 ns/op 368 B/op 12 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 2416411 557.3 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 2835487 497.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 2148339 521.0 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 2733025 435.9 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 3393285 313.1 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 3899338 299.7 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 4159400 358.9 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 2152507 581.6 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 2260812 556.1 ns/op 336 B/op 9 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 2191561 588.8 ns/op 336 B/op 9 allocs/op BenchmarkDivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 1713140 810.4 ns/op 496 B/op 16 allocs/op BenchmarkDivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 1493336 689.4 ns/op 496 B/op 16 allocs/op BenchmarkDivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 1790822 768.6 ns/op 496 B/op 16 allocs/op diff --git a/benchmarks/bench-udec.txt b/benchmarks/bench-udec.txt index f53b86b..ae2960f 100644 --- a/benchmarks/bench-udec.txt +++ b/benchmarks/bench-udec.txt @@ -342,86 +342,96 @@ BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 219055630 5.412 ns/op 0 B/op 0 allocs/op BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 224066515 5.396 ns/op 0 B/op 0 allocs/op BenchmarkMul/548751.15465466546.Mul(1542.456487)-32 223142401 5.418 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 158899826 7.890 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 159959955 7.939 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 151635830 7.542 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 151719582 7.975 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 150912637 7.539 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 159338456 7.996 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 160449841 7.724 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 159670674 7.986 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 155218692 7.956 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 159842067 7.955 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 53625254 24.85 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 52537207 23.58 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 51550003 25.04 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 48573836 23.28 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 52365888 24.74 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 51662060 24.97 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 47528337 23.43 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 46705844 24.19 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 51425244 23.99 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 48615493 24.87 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 74041957 16.77 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 73594875 17.64 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 68682906 17.42 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 69921109 16.97 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 71639146 17.52 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 73468084 16.72 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 73609564 16.79 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 69706010 17.57 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 59376592 17.45 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 72754681 17.60 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 151417376 7.863 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 153985288 7.380 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 163771196 7.365 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 163626572 7.759 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 163548856 7.570 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 164182551 7.359 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 164840156 7.340 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 155308354 7.381 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 164059641 7.664 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123.456.Div(0.123)-32 164531685 7.385 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 163878861 7.408 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 173012775 6.970 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 164052776 7.458 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 163789023 7.027 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 167186475 7.427 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 161348532 7.494 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 159593680 7.389 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 173906440 7.398 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 172751832 7.012 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/3.Div(7)-32 173701020 6.978 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 166079246 7.035 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 172252123 7.341 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 164495840 7.383 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 170320623 7.441 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 171529226 7.026 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 172993704 6.989 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 173547274 6.987 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 164820327 7.366 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 163448935 7.541 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(999999)-32 167308080 7.060 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 162654303 7.575 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 158638683 7.606 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 173672071 7.065 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 162716950 7.429 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 170468492 7.023 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 166012875 7.371 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 162642414 6.976 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 164865987 7.403 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 174594855 6.985 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 169113800 7.440 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 163025080 7.503 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 156008149 7.739 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 164614299 7.748 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 162713322 7.533 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 163467006 7.703 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 165232270 7.346 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 155746035 7.750 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 164033959 7.406 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 155916049 7.772 ns/op 0 B/op 0 allocs/op -BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 155291344 7.796 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 144060739 8.058 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 144099240 8.086 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 149271242 7.985 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 145068530 8.454 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 139311952 8.118 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 148210902 8.087 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 141451315 7.961 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 150262104 8.054 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 148680552 8.582 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(1111.1789)-32 141540331 8.513 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 94035123 13.04 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 92586112 12.99 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 87366928 13.79 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 94134597 13.63 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 95115486 12.95 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 95969488 13.75 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 88596912 12.87 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 92791944 12.92 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 95716098 13.03 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/12345.1234567890123456879.Div(1111.1234567890123456789)-32 91845314 13.76 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 100000000 11.58 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 103846548 11.84 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 100000000 12.18 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 100000000 11.41 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 100000000 12.19 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 98848603 12.13 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 100000000 11.64 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 100000000 11.72 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 100000000 11.70 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 101356140 11.84 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 148001583 8.044 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 149241036 8.572 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 149080956 8.492 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 140527568 8.036 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 149948042 8.023 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 148953070 8.504 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 140408126 8.012 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 141500830 8.015 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 141412342 8.025 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123.456.Div(0.123)-32 150531444 8.472 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 144138891 7.955 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 146353278 8.248 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 146075206 8.404 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 143596971 8.287 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 153517964 7.795 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 145339540 7.716 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 153193179 7.803 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 152815665 7.823 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 153297076 7.766 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/3.Div(7)-32 154605549 7.787 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 144638626 8.241 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 155198667 7.739 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 155213695 8.209 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 146253072 7.685 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 155267479 8.204 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 145863984 7.974 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 155252145 7.770 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 155350876 7.723 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 146246602 7.737 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(999999)-32 155581647 7.895 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 154664816 7.762 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 154684719 7.835 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 146021377 7.748 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 147999754 8.203 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 156171705 7.804 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 146935623 7.716 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 154615290 7.732 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 156030402 8.127 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 147510105 7.762 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/123456.123456.Div(456781244.1324897546)-32 145528779 8.242 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 150766317 7.988 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 140802597 8.543 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 146672997 8.519 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 140385244 8.484 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 140471734 8.585 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 139026746 7.994 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 146926623 8.471 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 148403264 8.526 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 141963686 7.974 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/548751.15465466546.Div(1542.456487)-32 142204936 7.926 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 81231584 15.02 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 82037275 16.02 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 75846043 14.92 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 79103592 15.89 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 76965264 15.78 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 78262795 15.53 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 79737885 15.79 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 79459810 15.94 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 79768341 15.21 ns/op 0 B/op 0 allocs/op +BenchmarkDiv/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 76145864 14.92 ns/op 0 B/op 0 allocs/op BenchmarkDivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 5048162 334.3 ns/op 264 B/op 7 allocs/op BenchmarkDivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 3739804 318.0 ns/op 264 B/op 7 allocs/op BenchmarkDivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 4189141 336.0 ns/op 264 B/op 7 allocs/op @@ -703,4 +713,4 @@ BenchmarkUnmarshalBinary/12345678901234567891234567890123456789.1234567890123456 BenchmarkUnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 12981919 89.92 ns/op 96 B/op 2 allocs/op BenchmarkUnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 13904814 88.03 ns/op 96 B/op 2 allocs/op PASS -ok github.com/quagmt/udecimal/benchmarks 1156.394s +ok github.com/quagmtimal/benchmarks 1156.394s diff --git a/benchmarks/benchmarks_test.go b/benchmarks/benchmarks_test.go index 6052a97..d416539 100644 --- a/benchmarks/benchmarks_test.go +++ b/benchmarks/benchmarks_test.go @@ -252,6 +252,7 @@ func BenchmarkDiv(b *testing.B) { {"123456.123456", "999999"}, {"123456.123456", "456781244.1324897546"}, {"548751.15465466546", "1542.456487"}, + {"22773757910726981402256170801141121114", "811656739243220271.159"}, } for _, tc := range testcases { @@ -281,7 +282,7 @@ func BenchmarkDiv(b *testing.B) { } } -func BenchmarkDivFallback(b *testing.B) { +func BenchmarkFallbackDiv(b *testing.B) { testcases := []struct { a, b string }{ diff --git a/benchmarks/benchstat.txt b/benchmarks/benchstat.txt index 29b7dac..5a8a75f 100644 --- a/benchmarks/benchstat.txt +++ b/benchmarks/benchstat.txt @@ -38,14 +38,15 @@ Mul/3.Mul(7)-32 Mul/123456.123456.Mul(999999)-32 102.350n ± 9% 5.564n ± 4% -94.56% (p=0.000 n=10) Mul/123456.123456.Mul(456781244.1324897546)-32 100.300n ± 8% 5.548n ± 3% -94.47% (p=0.000 n=10) Mul/548751.15465466546.Mul(1542.456487)-32 103.200n ± 6% 5.467n ± 4% -94.70% (p=0.000 n=10) -Div/1234567890123456789.1234567890123456879.Div(1111.1789)-32 531.550n ± 9% 7.947n ± 5% -98.50% (p=0.000 n=10) -Div/12345.1234567890123456879.Div(1111.1234567890123456789)-32 599.30n ± 35% 24.46n ± 4% -95.92% (p=0.000 n=10) -Div/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 489.35n ± 22% 17.44n ± 4% -96.44% (p=0.000 n=10) -Div/123.456.Div(0.123)-32 445.500n ± 8% 7.383n ± 5% -98.34% (p=0.000 n=10) -Div/3.Div(7)-32 523.050n ± 11% 7.394n ± 6% -98.59% (p=0.000 n=10) -Div/123456.123456.Div(999999)-32 444.800n ± 7% 7.200n ± 3% -98.38% (p=0.000 n=10) -Div/123456.123456.Div(456781244.1324897546)-32 557.050n ± 9% 7.387n ± 5% -98.67% (p=0.000 n=10) -Div/548751.15465466546.Div(1542.456487)-32 586.900n ± 12% 7.721n ± 4% -98.68% (p=0.000 n=10) +Div/1234567890123456789.1234567890123456879.Div(1111.1789)-32 443.000n ± 8% 8.087n ± 5% -98.17% (p=0.000 n=10) +Div/12345.1234567890123456879.Div(1111.1234567890123456789)-32 464.75n ± 14% 13.04n ± 6% -97.20% (p=0.000 n=10) +Div/1234567890123456789.1234567890123456879.Div(9876543210987654321.1234567890123456789)-32 483.30n ± 6% 11.78n ± 3% -97.56% (p=0.000 n=10) +Div/123.456.Div(0.123)-32 374.450n ± 6% 8.040n ± 6% -97.85% (p=0.000 n=10) +Div/3.Div(7)-32 380.250n ± 30% 7.813n ± 6% -97.95% (p=0.000 n=10) +Div/123456.123456.Div(999999)-32 373.250n ± 7% 7.833n ± 5% -97.90% (p=0.000 n=10) +Div/123456.123456.Div(456781244.1324897546)-32 452.800n ± 5% 7.783n ± 5% -98.28% (p=0.000 n=10) +Div/548751.15465466546.Div(1542.456487)-32 473.150n ± 6% 8.478n ± 6% -98.21% (p=0.000 n=10) +Div/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 509.35n ± 39% 15.65n ± 5% -96.93% (p=0.000 n=10) DivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 762.1n ± 7% 311.6n ± 17% -59.12% (p=0.000 n=10) DivFallback/1234.Div(12345679012345679890123456789.1234567890123456789)-32 242.1n ± 87% 326.6n ± 11% ~ (p=0.481 n=10) Pow/1.01.Pow(10)-32 724.70n ± 31% 38.25n ± 4% -94.72% (p=0.000 n=10) @@ -74,7 +75,7 @@ UnmarshalBinary/123456.123456-32 UnmarshalBinary/1234567890-32 65.140n ± 38% 1.790n ± 5% -97.25% (p=0.000 n=10) UnmarshalBinary/0.1234567890123456879-32 45.120n ± 18% 1.854n ± 5% -95.89% (p=0.000 n=10) UnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 72.44n ± 11% 75.91n ± 24% ~ (p=0.684 n=10) -geomean 183.7n 18.93n -89.69% +geomean 182.4n 18.72n -89.74% │ shopspring │ udecimal │ │ B/op │ B/op vs base │ @@ -120,6 +121,7 @@ Div/3.Div(7)-32 Div/123456.123456.Div(999999)-32 288.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) Div/123456.123456.Div(456781244.1324897546)-32 368.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) Div/548751.15465466546.Div(1542.456487)-32 368.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) +Div/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 336.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) DivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 496.0 ± 0% 264.0 ± 0% -46.77% (p=0.000 n=10) DivFallback/1234.Div(12345679012345679890123456789.1234567890123456789)-32 272.0 ± 0% 320.0 ± 0% +17.65% (p=0.000 n=10) Pow/1.01.Pow(10)-32 576.0 ± 0% 0.0 ± 0% -100.00% (p=0.000 n=10) @@ -148,7 +150,7 @@ UnmarshalBinary/123456.123456-32 UnmarshalBinary/1234567890-32 40.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalBinary/0.1234567890123456879-32 40.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) UnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=10) ¹ -geomean 120.6 ? ² ³ +geomean 122.4 ? ² ³ ¹ all samples are equal ² summaries must be >0 to compute geomean ³ ratios must be >0 to compute geomean @@ -197,6 +199,7 @@ Div/3.Div(7)-32 Div/123456.123456.Div(999999)-32 10.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) Div/123456.123456.Div(456781244.1324897546)-32 12.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) Div/548751.15465466546.Div(1542.456487)-32 12.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) +Div/22773757910726981402256170801141121114.Div(811656739243220271.159)-32 9.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) DivFallback/12345679012345679890123456789.1234567890123456789.Div(999999)-32 16.000 ± 0% 7.000 ± 0% -56.25% (p=0.000 n=10) DivFallback/1234.Div(12345679012345679890123456789.1234567890123456789)-32 8.000 ± 0% 7.000 ± 0% -12.50% (p=0.000 n=10) Pow/1.01.Pow(10)-32 18.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) @@ -225,7 +228,7 @@ UnmarshalBinary/123456.123456-32 UnmarshalBinary/1234567890-32 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalBinary/0.1234567890123456879-32 2.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) UnmarshalBinary/12345678901234567891234567890123456789.1234567890123456879-32 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ -geomean 4.268 ? ² ³ +geomean 4.313 ? ² ³ ¹ all samples are equal ² summaries must be >0 to compute geomean ³ ratios must be >0 to compute geomean diff --git a/decimal.go b/decimal.go index c9140b0..b1505eb 100644 --- a/decimal.go +++ b/decimal.go @@ -565,10 +565,12 @@ func (d Decimal) Div64(v uint64) (Decimal, error) { if !d.coef.overflow() { d256 := d.coef.u128.MulToU256(pow10[defaultPrec-d.prec]) - quo, _, err := d256.quoRem64Tou128(v) + quo, _, err := d256.div192by64(v) if err == nil { return newDecimal(d.neg, bintFromU128(quo), defaultPrec), nil } + + // overflow, try with *big.Int } // overflow, try with *big.Int diff --git a/decimal_test.go b/decimal_test.go index 8ae659c..b52b097 100644 --- a/decimal_test.go +++ b/decimal_test.go @@ -859,9 +859,13 @@ func TestDiv(t *testing.T) { overflow bool wantErr error }{ + {"22773757910726981402256170801141121114", "811656739243220271.159", false, nil}, + {"22773757910726981402256170801141121024", "2277375793122336353220649475.264577813", false, nil}, + {"2345678901234567899", "1234567890123456789.1234567890123456789", false, nil}, + {"123456.123", "8796093022208", false, nil}, {"1844674407370955161.5999999999", "18446744073709551616", false, nil}, {"1000000000000", "0.0000001", false, nil}, - {"479615345916448342049", "1494.186269970473681015", true, nil}, + {"479615345916448342049", "1494.186269970473681015", false, nil}, {"123456.1234567890123456789", "234567.1234567890123456789", false, nil}, {"123456.1234567890123456789", "1", false, nil}, {"-123456.1234567890123456789", "234567.1234567890123456789", false, nil}, @@ -874,7 +878,6 @@ func TestDiv(t *testing.T) { {"123456789012345678.9", "0.1", false, nil}, {"1111111111111", "1111.123456789123456789", false, nil}, {"123456789", "1.1234567890123456789", false, nil}, - {"2345678901234567899", "1234567890123456789.1234567890123456789", false, nil}, {"0.1234567890123456789", "0.04586201546101", false, nil}, {"1", "1111.123456789123456789", false, nil}, {"1", "1.123456789123456789", false, nil}, @@ -2318,6 +2321,14 @@ func TestPrecUint(t *testing.T) { require.Equal(t, oneUnit.prec, b.PrecUint()) } }) + } +} + +func BenchmarkDiv(b *testing.B) { + a := MustParse("22773757910726981402256170801141121114") + bb := MustParse("811656739243220271.159") + for i := 0; i < b.N; i++ { + _, _ = a.Div(bb) } } diff --git a/testdata/fuzz/FuzzDivDec/d1ac6afed7c33047 b/testdata/fuzz/FuzzDivDec/d1ac6afed7c33047 new file mode 100644 index 0000000..c23b6df --- /dev/null +++ b/testdata/fuzz/FuzzDivDec/d1ac6afed7c33047 @@ -0,0 +1,9 @@ +go test fuzz v1 +bool(true) +uint64(1234567890123456789) +uint64(90) +byte('\x00') +bool(true) +uint64(44) +uint64(55) +byte('b') diff --git a/u128.go b/u128.go index dbcc536..ff51d03 100644 --- a/u128.go +++ b/u128.go @@ -8,6 +8,7 @@ import ( var ( one128 = u128{lo: 1} + max128 = u128{hi: ^uint64(0), lo: ^uint64(0)} ) // u128 (big unsigned-integer) is a 128-bits unsigned integer @@ -72,14 +73,6 @@ func (u u128) Cmp64(v uint64) int { } } -func (u u128) LessThan(v u128) bool { - if u.hi < v.hi || (u.hi == v.hi && u.lo < v.lo) { - return true - } - - return false -} - func (u u128) Add(v u128) (u128, error) { lo, carry := bits.Add64(u.lo, v.lo, 0) hi, carry := bits.Add64(u.hi, v.hi, carry) @@ -283,36 +276,44 @@ func (u u128) ToBigInt() *big.Int { // getTrailingZeros64 returns the number of trailing zeros in u // NOTE: this only works when maxPrec is 19 func getTrailingZeros64(u uint64) uint8 { - var z uint8 + var zeros uint8 + + // max scale is 19, so we can start from 16 if u%1e16 == 0 { - z = 16 + zeros += 16 + u /= 1e16 - if u%pow10[z+2].lo == 0 { - z += 2 + // short path, because we know that max scale is 19 + if u%100 == 0 { + zeros += 2 + u /= 100 } - if u%pow10[z+1].lo == 0 { - z++ + if u%10 == 0 { + zeros++ } - return z + return zeros } - if u%pow10[8].lo == 0 { - z += 8 + if u%1e8 == 0 { + zeros += 8 + u /= 1e8 } - if u%pow10[z+4].lo == 0 { - z += 4 + if u%1e4 == 0 { + zeros += 4 + u /= 1e4 } - if u%pow10[z+2].lo == 0 { - z += 2 + if u%100 == 0 { + zeros += 2 + u /= 100 } - if u%pow10[z+1].lo == 0 { - z++ + if u%10 == 0 { + zeros++ } - return z + return zeros } diff --git a/u256.go b/u256.go index 81687f3..046796c 100644 --- a/u256.go +++ b/u256.go @@ -55,19 +55,6 @@ func (u u256) bitLen() int { // return s // } -// Compare 2 u256, returns: -// -// +1 when u > v -// 0 when u = v -// -1 when u < v -func (u u256) cmp(v u256) int { - if k := u.carry.Cmp(v.carry); k != 0 { - return k - } - - return u128FromHiLo(u.hi, u.lo).Cmp(u128FromHiLo(v.hi, v.lo)) -} - // Compare u256 and U128, returns: // // +1 when u > v @@ -133,220 +120,126 @@ func (u u256) mul128(v u128) (u256, error) { return u256{hi: a.hi, lo: a.lo, carry: c}, nil } -func (u u256) sub(v u256) (u256, error) { - lo, borrow := bits.Sub64(u.lo, v.lo, 0) - hi, borrow := bits.Sub64(u.hi, v.hi, borrow) - - c, err := v.carry.Add64(borrow) - if err != nil { - return u256{}, err +// fastQuo only returns quotient of u/v +func (u u256) fastQuo(v u128) (u128, error) { + if u.carry.IsZero() { + q, _, err := u128FromHiLo(u.hi, u.lo).QuoRem(v) + return q, err } - c1, err := u.carry.Sub(c) - if err != nil { - return u256{}, err + if v.hi == 0 && u.carry.hi == 0 { + q, _, err := u.div192by64(v.lo) + return q, err } - return u256{lo: lo, hi: hi, carry: c1}, nil -} - -func (u u256) rsh(n uint) (v u256) { - switch { - case n < 64: - v.carry = u.carry.Rsh(n) - v.hi = u.carry.lo<<(64-n) | u.hi>>n - v.lo = u.hi<<(64-n) | u.lo>>n - - case 64 <= n && n < 128: - v.carry.hi = 0 - v.carry.lo = u.carry.hi >> (n - 64) - v.hi = u.carry.hi<<(128-n) | u.carry.lo>>(n-64) - v.lo = u.carry.lo<<(128-n) | u.hi>>(n-64) - - case n >= 128: - v.carry = u128{} - c := u128{hi: u.carry.hi, lo: u.carry.lo}.Rsh(n - 128) - v.hi, v.lo = c.hi, c.lo - default: - // n < 0, can't happen + // now we have u192 / u128 or u256 / u128 + if u.carry.Cmp(v) >= 0 { + // obviously the result won't fit into u128 + return u128{}, errOverflow } - return + return u.div256by128(v), nil } -// Quo only returns quotient of u/v -// Fast divsion for U192 divided by U128 using Hacker's Delight multiword division algorithm -// with some constraints regarding max coef and prec value, including: -// -// max(coef) = 2^128-1 -// max(prec) = 19 -// max(u) = 2^192-1 -func (u u256) fastQuo(v u128) (u128, error) { - // if u >= 2^192, the quotient might won't fit in 128-bits number (overflow). - if u.carry.hi != 0 { - return u128{}, errOverflow +// div192by64 return q,r which: +// q must be a u128 +// u = q*v + r +// Returns error if u.carry >= v, because the result can't fit into u128 +func (u u256) div192by64(v uint64) (u128, uint64, error) { + if u.carry.Cmp64(v) >= 0 { + return u128{}, 0, errOverflow } - if u.carry.IsZero() { - q, _, err := u128FromHiLo(u.hi, u.lo).QuoRem(v) - return q, err - } + // can't panic because we already check u.carry < v (u.carry.hi == 0 && u.carry.lo < v) + hi, rem := bits.Div64(u.carry.lo, u.hi, v) - if v.hi == 0 { - q, _, err := u.quoRem64Tou128(v.lo) - return q, err - } + // can't panic because rem < v + lo, r := bits.Div64(rem, u.lo, v) + return u128FromHiLo(hi, lo), r, nil +} - // Let q be the final quotient and tq be the 'trial quotient' - // The trial quotient tq is defined as tq = q + k, where k is a correction factor. - // Here's how we determine k using the following steps: - - // 1. Compute the trial quotient tq as tq = [u1 / v1], where: - // - u1 = [u / 2^(64 - shift)] - // - v1 = [v / 2^(64 - shift)] - // (This follows the Hacker's Delight multiword division algorithm) - // 2. Calculate vq = v * tq - // 3. If vq <= u, then q = tq - // 4. If vq > u, compute the difference vqu = vq - u, which can be expressed as: - // vqu = v * (q + k) - (vq + r) = v * k - r = v * (k1 + 1) - r = v * k1 + v - r - // 5. Determine k1 as k1 = [vqu / v] and adjust k1 if necessary - // - // However, given that u < 2^192 and v < 2^128, and 0 <= k <= 2^64, it's possible for v * k to exceed 2^128, - // causing an overflow in vqu. - // To mitigate this, we need to find the minimum k. - // If even the minimum k leads to v * k > 2^128, we fall back to big.Int division - // due to the lack of a fast algorithm for dividing U192 by U128. - // - // tq = [u1 / v1] = [u / (v - rem(v, 2^(64 - n))] - // The minimum k is achieved when tq is minimized, which happens when rem(v, 2^(64 - n)) is minimized, - // --> 2^(64 - n) being minimized --> n should be maximized. - // n is the number of leading zeros in v, so 0 <= n <= 63. - // Technically, using n = 63 provides the optimal k. - - // However, when n = 63: - // - u1 = [u / 2^(64 - 63)] = u >> 1 - // - v1 = [v / 2^(64 - 63)] = v >> 1 - // And if u1 is U192 and v1 is U128, we cannot find tq = [u1 / v1], - // since there's no U192/U128 division algorithm currently available. - // - // What we do have are fast algorithms for U192/U64 or U128/U128 division. - // Therefore, we can only compute tq by adjusting u and v to fit either U128/U128 or U192/U64. - // This might not be the best optimization, but it's the best we can achieve for now. - // If we later find a fast U192/U128 division algorithm, we can improve this process. - // - // As previously mentioned, if after finding the minimum k, v * k still exceeds 2^128, we will fall back to big.Int division. +// div256by128 performs u256 / u128, which u256.carry < u128 +// This implementation is based on divllu from https://github.com/ridiculousfish/libdivide +// The algorithm is explained in this blog post: https://ridiculousfish.com/blog/posts/labor-of-division-episode-iv.html +func (u u256) div256by128(v u128) u128 { + // normalize v + n := bits.LeadingZeros64(v.hi) // nolint: gosec - n := uint(bits.LeadingZeros64(v.hi)) + v = v.Lsh(uint(n)) - // nolint: gosec - m := uint(bits.LeadingZeros64(u.carry.lo)) - - var ( - v1, tq u128 - u1 u256 - err error - ) - - if n >= m { - v1 = v.Lsh(n) - u1 = u.rsh(64 - n) - tq, _, err = u1.quoRem64Tou128(v1.hi) - if err != nil { - return u128{}, err - } - } else { - // n < m - v1 = v.Rsh(64 - m) - u1 = u.rsh(64 - m) + // shift u to the left by n bits (n < 64) + a := [4]uint64{} + a[0] = u.lo << n + a[1] = u.lo>>(64-n) | u.hi<>(64-n) | u.carry.lo<>(64-n) | u.carry.hi< v.hi) { + aLen = 4 } - vq := v.MulToU256(tq) - - // let k = 1 + [(u*rem(v, 2^(64-n))) / (v*(v-rem(v, 2^(64-n)))] - // vq = v*tq = v(q + k) - if vq.cmp(u) <= 0 { - // vq <= u means tq = q - return tq, nil - } + q := [2]uint64{} - // vqu = vq - u = v*(q+k) - (vq + r) = v*k - r - vqu, err := vq.sub(u) - if err != nil { - return u128{}, err - } + for i := aLen - 3; i >= 0; i-- { + u2, u1, u0 := a[i+2], a[i+1], a[i] - if !vqu.carry.IsZero() { - // v * k > 2^128, we can't find k - // fall back to big.Int division - return u128{}, errOverflow - } + // trial quotient tq = [u2,u1,u0] / v ~= [u2,u1] / v.hi + // tq <= q + 2 + tq, r := bits.Div64(u2, u1, v.hi) - vqu128 := u128FromHiLo(vqu.hi, vqu.lo) + c1h, c1l := bits.Mul64(tq, v.lo) + c1 := u128{hi: c1h, lo: c1l} + c2 := u128{hi: r, lo: u0} - // k1 = k - 1 - // vqu = v*k - r = v*(k1 + 1) - r = v*k1 + v - r - // k1 <= [vqu / v] <= k1 + 1 - k1, _, err := vqu128.QuoRem(v) - if err != nil { - return u128{}, err - } + // adjust tq + var k uint64 + if c1.Cmp(c2) > 0 { + k = 1 - // adjust k1 - vqu1, err := v.Mul(k1) - if err != nil { - return u128{}, err - } + // d = c1 - c2 + if subUnsafe(c1, c2).Cmp(v) > 0 { + k = 2 + } + } - // if [vqu / v] = k1 + 1, then we don't have to adjust because final k = k1 + 1 - // if [vqu / v] = k1, then final k = k1 + 1 - if vqu1.Cmp(vqu128) < 0 { - k1, err = k1.Add64(1) - if err != nil { - return u128{}, err + q[i] = tq - k + + // true remainder rem = [u2,u1,u0] - q*v = c2 - c1 + k*v (k <= 2) + var rem u128 + switch k { + case 0: + // rem = c2 - c1 + rem = subUnsafe(c2, c1) + case 1: + // rem = c2 - c1 + v = v - (c1 - c2) with c1 > c2 + rem = subUnsafe(c1, c2) + rem = subUnsafe(v, rem) + case 2: + // rem = c2 - c1 + 2*v = v + v - (c1 - c2) with c1 > c2 + // v = max(u128) - not(v) + // --> rem = v - not(v) + max(u128) - (c1 - c2) + // v >= not(v) because v is normalized. Hence, we can safely caculate rem without checking overflow + c12 := subUnsafe(c1, c2) + c12 = subUnsafe(max128, c12) + rem = subUnsafe(v, u128{hi: ^v.hi, lo: ^v.lo}) + + // this also can't overflow because rem < v <= max(u128) + rem, _ = rem.Add(c12) } - } - // final q = tq - k - tq, err = tq.Sub(k1) - if err != nil { - return u128{}, err + a[i+1], a[i] = rem.hi, rem.lo } - // we don't really need the remainder, might un-comment later if needed - // r, err := v.Sub(r1) - // if err != nil { - // return u128{}, u128{}, err - // } - - return tq, nil + return u128{hi: q[1], lo: q[0]} } -// quoRem64Tou128 return q,r which: -// -// q must be a u128 -// u = q*v + r -// Return overflow if the result q doesn't fit in a u128 -func (u u256) quoRem64Tou128(v uint64) (u128, uint64, error) { - if u.carry.lo == 0 { - q, r := u128FromHiLo(u.hi, u.lo).QuoRem64(v) - return q, r, nil - } - - quo, rem := u128FromHiLo(u.carry.lo, u.hi).QuoRem64(v) - if quo.hi != 0 { - return u128{}, 0, errOverflow - } - - hi := quo.lo - - // can't panic because rem < v - lo, r := bits.Div64(rem, u.lo, v) - - return u128FromHiLo(hi, lo), r, nil +// subUnsafe returns u - v with u >= v +// must be called only when u >= v or the result will be incorrect +func subUnsafe(u, v u128) u128 { + lo, borrow := bits.Sub64(u.lo, v.lo, 0) + hi, _ := bits.Sub64(u.hi, v.hi, borrow) + return u128{hi: hi, lo: lo} } diff --git a/u256_test.go b/u256_test.go index e7e45ca..932b2d8 100644 --- a/u256_test.go +++ b/u256_test.go @@ -41,73 +41,3 @@ func TestBitlen(t *testing.T) { }) } } - -func TestRsh(t *testing.T) { - testcases := []struct { - u u256 - shift uint - }{ - { - u: u256{hi: 1234567890123456, lo: 1234567890123456, carry: u128{hi: 1234567890123456, lo: 1234567890123456}}, - shift: 49, - }, - { - u: u256{hi: 1234567890123456, lo: 1234567890123456, carry: u128{hi: 1234567890123456, lo: 1234567890123456}}, - shift: 64, - }, - { - u: u256{hi: 1234567890123456, lo: 1234567890123456, carry: u128{hi: 1234567890123456, lo: 1234567890123456}}, - shift: 113, - }, - { - u: u256{hi: 1234567890123456, lo: 1234567890123456, carry: u128{hi: 1234567890123456, lo: 1234567890123456}}, - shift: 157, - }, - - { - u: u256{hi: 1234567890123456, lo: 1234567890123456, carry: u128{hi: 1234567890123456, lo: 1234567890123456}}, - shift: 212, - }, - } - - for i, tc := range testcases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { - u := tc.u - v := u.rsh(tc.shift) - - if tc.shift <= 64 { - a := u128{hi: u.hi, lo: u.lo}.Rsh(tc.shift) - require.Equal(t, a.lo, v.lo) - - b := u128{hi: u.carry.lo, lo: u.hi}.Rsh(tc.shift) - require.Equal(t, b.lo, v.hi) - - c := u128{hi: u.carry.hi, lo: u.carry.lo}.Rsh(tc.shift) - require.Equal(t, c, v.carry) - return - } - - if tc.shift <= 128 { - a := u128{hi: u.carry.lo, lo: u.hi}.Rsh(tc.shift - 64) - require.Equal(t, a.lo, v.lo) - - b := u128{hi: u.carry.hi, lo: u.carry.lo}.Rsh(tc.shift - 64) - require.Equal(t, b.lo, v.hi) - - c := u128{hi: 0, lo: u.carry.hi}.Rsh(tc.shift - 64) - require.Equal(t, c, v.carry) - return - } - - a := u128{hi: u.carry.hi, lo: u.carry.lo}.Rsh(tc.shift - 128) - require.Equal(t, a.lo, v.lo) - require.Equal(t, a.hi, v.hi) - require.Equal(t, u128{}, v.carry) - }) - } -} - -// 0000000000000100011000101101010100111100100010101011101011000000. -// 0000000000000100011000101101010100111100100010101011101011000000. -// 0000000000000100011000101101010100111100100010101011101011000000. -// 0000000000000100011000101101010100111100100010101011101011000000