From b3f0649fc937724ef67835036f5eb0642a843ea2 Mon Sep 17 00:00:00 2001
From: Alberto Donizetti <alb.donizetti@gmail.com>
Date: Sun, 11 Sep 2016 11:55:48 +0200
Subject: [PATCH] sqrt.go: remove allocations from Newton functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

name               old time/op    new time/op    delta
Exp2Prec53-4         69.4µs ± 4%    61.0µs ± 1%  -12.09%          (p=0.008 n=5+5)
Exp2Prec100-4         154µs ± 0%     137µs ± 0%  -11.39%          (p=0.008 n=5+5)
Exp2Prec1000-4       1.07ms ± 0%    0.98ms ± 0%   -7.91%          (p=0.008 n=5+5)
Exp2Prec10000-4      14.5ms ± 0%    14.3ms ± 0%   -0.97%          (p=0.008 n=5+5)
Exp2Prec100000-4      426ms ± 0%     426ms ± 0%     ~             (p=0.222 n=5+5)
Log2Prec53-4         63.1µs ± 0%    56.4µs ± 0%  -10.63%          (p=0.008 n=5+5)
Log2Prec100-4        69.9µs ± 0%    64.7µs ± 3%   -7.44%          (p=0.008 n=5+5)
Log2Prec1000-4        216µs ± 0%     196µs ± 1%   -9.02%          (p=0.008 n=5+5)
Log2Prec10000-4      3.65ms ± 0%    3.60ms ± 0%   -1.33%          (p=0.008 n=5+5)
Log2Prec100000-4      134ms ± 2%     133ms ± 0%     ~             (p=0.151 n=5+5)
Sqrt2Prec53-4        1.16µs ± 0%    1.16µs ± 0%     ~             (p=0.087 n=5+5)
Sqrt2Prec100-4       2.38µs ± 4%    2.27µs ± 5%     ~             (p=0.127 n=5+5)
Sqrt2Prec1000-4      8.47µs ± 0%    7.28µs ± 0%  -14.07%          (p=0.008 n=5+5)
Sqrt2Prec10000-4     51.4µs ± 1%    49.6µs ± 0%   -3.43%          (p=0.008 n=5+5)
Sqrt2Prec100000-4     972µs ± 0%     974µs ± 0%     ~             (p=0.151 n=5+5)

name               old allocs/op  new allocs/op  delta
Exp2Prec53-4            593 ± 0%       461 ± 0%  -22.26%          (p=0.008 n=5+5)
Exp2Prec100-4         1.28k ± 0%     0.99k ± 0%  -22.46%          (p=0.008 n=5+5)
Exp2Prec1000-4        5.86k ± 0%     4.35k ± 0%  -25.76%          (p=0.008 n=5+5)
Exp2Prec10000-4       13.5k ± 0%      9.9k ± 0%  -26.90%          (p=0.008 n=5+5)
Exp2Prec100000-4      27.0k ± 0%     19.7k ± 0%  -27.19%          (p=0.008 n=5+5)
Log2Prec53-4            539 ± 0%       419 ± 0%  -22.26%          (p=0.008 n=5+5)
Log2Prec100-4           590 ± 0%       458 ± 0%  -22.37%          (p=0.008 n=5+5)
Log2Prec1000-4        1.24k ± 0%     0.90k ± 0%  -27.53%          (p=0.008 n=5+5)
Log2Prec10000-4       2.70k ± 0%     1.96k ± 0%  -27.28%          (p=0.008 n=5+5)
Log2Prec100000-4      5.21k ± 0%     3.89k ± 0%  -25.35%          (p=0.008 n=5+5)
Sqrt2Prec53-4          14.0 ± 0%      14.0 ± 0%     ~     (all samples are equal)
Sqrt2Prec100-4         23.0 ± 0%      21.0 ± 0%   -8.70%          (p=0.008 n=5+5)
Sqrt2Prec1000-4        63.0 ± 0%      43.0 ± 0%  -31.75%          (p=0.008 n=5+5)
Sqrt2Prec10000-4       92.0 ± 0%      60.0 ± 0%  -34.78%          (p=0.008 n=5+5)
Sqrt2Prec100000-4       128 ± 0%        84 ± 0%  -34.38%          (p=0.008 n=5+5)
---
 sqrt.go | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/sqrt.go b/sqrt.go
index 6f96830..eb20201 100644
--- a/sqrt.go
+++ b/sqrt.go
@@ -1,5 +1,5 @@
-// Package floats provides the implementation of a few additional operations for the
-// standard library big.Float type.
+// Package floats provides the implementation of a few additional
+// operations for the standard library big.Float type.
 package floats
 
 import (
@@ -7,9 +7,10 @@ import (
 	"math/big"
 )
 
-// Sqrt returns a big.Float representation of the square root of z. Precision is
-// the same as the one of the argument. The function panics if z is negative, returns ±0
-// when z = ±0, and +Inf when z = +Inf.
+// Sqrt returns a big.Float representation of the square root of
+// z. Precision is the same as the one of the argument. The function
+// panics if z is negative, returns ±0 when z = ±0, and +Inf when z =
+// +Inf.
 func Sqrt(z *big.Float) *big.Float {
 
 	// panic on negative z
@@ -32,9 +33,8 @@ func Sqrt(z *big.Float) *big.Float {
 	//   √(2a)·2**b/2      if b > 0 is odd
 	//   √(0.5a)·2**b/2    if b < 0 is odd
 	//
-	// The difference in the odd exponent case is due
-	// to the fact that exp/2 is rounded in different
-	// directions when exp is negative.
+	// The difference in the odd exponent case is due to the fact that
+	// exp/2 is rounded in different directions when exp is negative.
 	mant := new(big.Float)
 	exp := z.MantExp(mant)
 	switch exp % 2 {
@@ -44,12 +44,13 @@ func Sqrt(z *big.Float) *big.Float {
 		mant.Mul(big.NewFloat(0.5), mant)
 	}
 
-	// Solving x² - z = 0 directly requires a Quo
-	// call, but it's faster for small precisions.
-	// Solvin 1/x² - z = 0 avoids the Quo call and
-	// is much faster for high precisions.
-	// Use sqrtDirect for prec <= 128 and
-	// sqrtInverse for prec > 128.
+	// Solving x² - z = 0 directly requires a Quo call, but it's
+	// faster for small precisions.
+	//
+	// Solving 1/x² - z = 0 avoids the Quo call and is much faster for
+	// high precisions.
+	//
+	// Use sqrtDirect for prec <= 128 and sqrtInverse for prec > 128.
 	var x *big.Float
 	if z.Prec() <= 128 {
 		x = sqrtDirect(mant)
@@ -66,10 +67,11 @@ func Sqrt(z *big.Float) *big.Float {
 // t² - z = 0 for t
 func sqrtDirect(z *big.Float) *big.Float {
 	// f(t)/f'(t) = 0.5(t² - z)/t
+	half := big.NewFloat(0.5)
 	f := func(t *big.Float) *big.Float {
 		x := new(big.Float).Mul(t, t) // x = t²
 		x.Sub(x, z)                   // x = t² - z
-		x.Mul(big.NewFloat(0.5), x)   // x = 0.5(t² - z)
+		x.Mul(half, x)                // x = 0.5(t² - z)
 		return x.Quo(x, t)            // return x = 0.5(t² - z)/t
 	}
 
@@ -84,13 +86,15 @@ func sqrtDirect(z *big.Float) *big.Float {
 // 1/t² - z = 0 for x and then inverting.
 func sqrtInverse(z *big.Float) *big.Float {
 	// f(t)/f'(t) = -0.5t(1 - zt²)
+	nhalf := big.NewFloat(-0.5)
+	one := big.NewFloat(1)
 	f := func(t *big.Float) *big.Float {
 		u := new(big.Float)
 		u.Mul(t, t)                     // u = t²
 		u.Mul(u, z)                     // u = zt²
-		u.Sub(big.NewFloat(1), u)       // u = 1 - zt²
-		u.Mul(u, big.NewFloat(-0.5))    // u = 0.5(1 - zt²)
-		return new(big.Float).Mul(t, u) // x = 0.5t(1 - zt²)
+		u.Sub(one, u)                   // u = 1 - zt²
+		u.Mul(u, nhalf)                 // u = -0.5(1 - zt²)
+		return new(big.Float).Mul(t, u) // x = -0.5t(1 - zt²)
 	}
 
 	// initial guess