| 
 | 1 | +// Calculate the inverse of x using the Newton-Raphson method.  | 
 | 2 | +static VALUE  | 
 | 3 | +newton_raphson_inverse(VALUE x, size_t prec) {  | 
 | 4 | +    BDVALUE bdone = NewZeroWrap(1, 1);  | 
 | 5 | +    VpSetOne(bdone.real);  | 
 | 6 | +    VALUE one = bdone.bigdecimal;  | 
 | 7 | + | 
 | 8 | +    // Initial approximation in 2 digits  | 
 | 9 | +    BDVALUE bdx = GetBDValueMust(x);  | 
 | 10 | +    BDVALUE inv0 = NewZeroWrap(1, 2 * BIGDECIMAL_COMPONENT_FIGURES);  | 
 | 11 | +    VpSetOne(inv0.real);  | 
 | 12 | +    DECDIG_DBL numerator = (DECDIG_DBL)BIGDECIMAL_BASE * 100;  | 
 | 13 | +    DECDIG_DBL denominator = (DECDIG_DBL)bdx.real->frac[0] * 100 + (DECDIG_DBL)(bdx.real->Prec >= 2 ? bdx.real->frac[1] : 0) * 100 / BIGDECIMAL_BASE;  | 
 | 14 | +    inv0.real->frac[0] = (DECDIG)(numerator / denominator);  | 
 | 15 | +    inv0.real->frac[1] = (DECDIG)((numerator % denominator) * (BIGDECIMAL_BASE / 100) / denominator * 100);  | 
 | 16 | +    inv0.real->Prec = 2;  | 
 | 17 | +    inv0.real->exponent = 1 - bdx.real->exponent;  | 
 | 18 | +    VpNmlz(inv0.real);  | 
 | 19 | +    RB_GC_GUARD(bdx.bigdecimal);  | 
 | 20 | +    VALUE inv = inv0.bigdecimal;  | 
 | 21 | + | 
 | 22 | +    int bl = 1;  | 
 | 23 | +    while (((size_t)1 << bl) < prec) bl++;  | 
 | 24 | + | 
 | 25 | +    for (int i = bl; i >= 0; i--) {  | 
 | 26 | +        size_t n = (prec >> i) + 2;  | 
 | 27 | +        if (n > prec) n = prec;  | 
 | 28 | +        // Newton-Raphson iteration: inv_next = inv + inv * (1 - x * inv)  | 
 | 29 | +        VALUE one_minus_x_inv = BigDecimal_sub2(  | 
 | 30 | +            one,  | 
 | 31 | +            BigDecimal_mult(BigDecimal_mult2(x, one, SIZET2NUM(n + 1)), inv),  | 
 | 32 | +            SIZET2NUM(SIZET2NUM(n / 2))  | 
 | 33 | +        );  | 
 | 34 | +        inv = BigDecimal_add2(  | 
 | 35 | +            inv,  | 
 | 36 | +            BigDecimal_mult(inv, one_minus_x_inv),  | 
 | 37 | +            SIZET2NUM(n)  | 
 | 38 | +        );  | 
 | 39 | +    }  | 
 | 40 | +    return inv;  | 
 | 41 | +}  | 
 | 42 | + | 
 | 43 | +// Calculates divmod by multiplying approximate reciprocal of y  | 
 | 44 | +static void  | 
 | 45 | +divmod_by_inv_mul(VALUE x, VALUE y, VALUE inv, VALUE *res_div, VALUE *res_mod) {  | 
 | 46 | +    VALUE div = BigDecimal_fix(BigDecimal_mult(x, inv));  | 
 | 47 | +    VALUE mod = BigDecimal_sub(x, BigDecimal_mult(div, y));  | 
 | 48 | +    while (RTEST(BigDecimal_lt(mod, INT2FIX(0)))) {  | 
 | 49 | +        mod = BigDecimal_add(mod, y);  | 
 | 50 | +        div = BigDecimal_sub(div, INT2FIX(1));  | 
 | 51 | +    }  | 
 | 52 | +    while (RTEST(BigDecimal_ge(mod, y))) {  | 
 | 53 | +        mod = BigDecimal_sub(mod, y);  | 
 | 54 | +        div = BigDecimal_add(div, INT2FIX(1));  | 
 | 55 | +    }  | 
 | 56 | +    *res_div = div;  | 
 | 57 | +    *res_mod = mod;  | 
 | 58 | +}  | 
 | 59 | + | 
 | 60 | +static void  | 
 | 61 | +slice_copy(DECDIG *dest, Real *src, size_t rshift, size_t length) {  | 
 | 62 | +    ssize_t start = src->exponent - rshift - length;  | 
 | 63 | +    if (start >= (ssize_t)src->Prec) return;  | 
 | 64 | +    if (start < 0) {  | 
 | 65 | +        dest -= start;  | 
 | 66 | +        length += start;  | 
 | 67 | +        start = 0;  | 
 | 68 | +    }  | 
 | 69 | +    size_t max_length = src->Prec - start;  | 
 | 70 | +    memcpy(dest, src->frac + start, Min(length, max_length) * sizeof(DECDIG));  | 
 | 71 | +}  | 
 | 72 | + | 
 | 73 | +/* Calculates divmod using Newton-Raphson method.  | 
 | 74 | + * x and y must be a BigDecimal representing an integer value.  | 
 | 75 | + *  | 
 | 76 | + * To calculate with low cost, we need to split x into blocks and perform divmod for each block.  | 
 | 77 | + * x_digits = remaining_digits(<= y_digits) + block_digits * num_blocks  | 
 | 78 | + *  | 
 | 79 | + * Example:  | 
 | 80 | + * xxx_xxxxx_xxxxx_xxxxx(18 digits) / yyyyy(5 digits)  | 
 | 81 | + * remaining_digits = 3, block_digits = 5, num_blocks = 3  | 
 | 82 | + * repeating xxxxx_xxxxxx.divmod(yyyyy) calculation 3 times.  | 
 | 83 | + *  | 
 | 84 | + * In each divmod step, dividend is at most (y_digits + block_digits) digits and divisor is y_digits digits.  | 
 | 85 | + * Reciprocal of y needs block_digits + 1 precision.  | 
 | 86 | + */  | 
 | 87 | +static void  | 
 | 88 | +divmod_newton(VALUE x, VALUE y, VALUE *div_out, VALUE *mod_out) {  | 
 | 89 | +    size_t x_digits = NUM2SIZET(BigDecimal_exponent(x));  | 
 | 90 | +    size_t y_digits = NUM2SIZET(BigDecimal_exponent(y));  | 
 | 91 | +    if (x_digits <= y_digits) x_digits = y_digits + 1;  | 
 | 92 | + | 
 | 93 | +    size_t n = x_digits / y_digits;  | 
 | 94 | +    size_t block_figs = (x_digits - y_digits) / n / BIGDECIMAL_COMPONENT_FIGURES + 1;  | 
 | 95 | +    size_t block_digits = block_figs * BIGDECIMAL_COMPONENT_FIGURES;  | 
 | 96 | +    size_t num_blocks = (x_digits - y_digits + block_digits - 1) / block_digits;  | 
 | 97 | +    size_t y_figs = (y_digits - 1) / BIGDECIMAL_COMPONENT_FIGURES + 1;  | 
 | 98 | +    VALUE yinv = newton_raphson_inverse(y, block_digits + 1);  | 
 | 99 | + | 
 | 100 | +    BDVALUE divident = NewZeroWrap(1, BIGDECIMAL_COMPONENT_FIGURES * (y_figs + block_figs));  | 
 | 101 | +    BDVALUE div_result = NewZeroWrap(1, BIGDECIMAL_COMPONENT_FIGURES * (num_blocks * block_figs + 1));  | 
 | 102 | +    BDVALUE bdx = GetBDValueMust(x);  | 
 | 103 | + | 
 | 104 | +    VALUE mod = BigDecimal_fix(BigDecimal_decimal_shift(x, SSIZET2NUM(-num_blocks * block_digits)));  | 
 | 105 | +    for (ssize_t i = num_blocks - 1; i >= 0; i--) {  | 
 | 106 | +        memset(divident.real->frac, 0, (y_figs + block_figs) * sizeof(DECDIG));  | 
 | 107 | + | 
 | 108 | +        BDVALUE bdmod = GetBDValueMust(mod);  | 
 | 109 | +        slice_copy(divident.real->frac, bdmod.real, 0, y_figs);  | 
 | 110 | +        slice_copy(divident.real->frac + y_figs, bdx.real, i * block_figs, block_figs);  | 
 | 111 | +        RB_GC_GUARD(bdmod.bigdecimal);  | 
 | 112 | + | 
 | 113 | +        VpSetSign(divident.real, 1);  | 
 | 114 | +        divident.real->exponent = y_figs + block_figs;  | 
 | 115 | +        divident.real->Prec = y_figs + block_figs;  | 
 | 116 | +        VpNmlz(divident.real);  | 
 | 117 | + | 
 | 118 | +        VALUE div;  | 
 | 119 | +        divmod_by_inv_mul(divident.bigdecimal, y, yinv, &div, &mod);  | 
 | 120 | +        BDVALUE bddiv = GetBDValueMust(div);  | 
 | 121 | +        slice_copy(div_result.real->frac + (num_blocks - i - 1) * block_figs, bddiv.real, 0, block_figs + 1);  | 
 | 122 | +        RB_GC_GUARD(bddiv.bigdecimal);  | 
 | 123 | +    }  | 
 | 124 | +    VpSetSign(div_result.real, 1);  | 
 | 125 | +    div_result.real->exponent = num_blocks * block_figs + 1;  | 
 | 126 | +    div_result.real->Prec = num_blocks * block_figs + 1;  | 
 | 127 | +    VpNmlz(div_result.real);  | 
 | 128 | +    RB_GC_GUARD(bdx.bigdecimal);  | 
 | 129 | +    RB_GC_GUARD(divident.bigdecimal);  | 
 | 130 | +    RB_GC_GUARD(div_result.bigdecimal);  | 
 | 131 | +    *div_out = div_result.bigdecimal;  | 
 | 132 | +    *mod_out = mod;  | 
 | 133 | +}  | 
 | 134 | + | 
 | 135 | +static VALUE  | 
 | 136 | +VpDivdNewtonInner(VALUE args_ptr)  | 
 | 137 | +{  | 
 | 138 | +    Real **args = (Real**)args_ptr;  | 
 | 139 | +    Real *c = args[0], *r = args[1], *a = args[2], *b = args[3];  | 
 | 140 | +    BDVALUE a2, b2, c2, r2;  | 
 | 141 | +    VALUE div, mod, a2_frac = Qnil;  | 
 | 142 | +    size_t div_prec = c->MaxPrec - 1;  | 
 | 143 | +    size_t base_prec = b->Prec;  | 
 | 144 | + | 
 | 145 | +    a2 = NewZeroWrap(1, a->Prec * BIGDECIMAL_COMPONENT_FIGURES);  | 
 | 146 | +    b2 = NewZeroWrap(1, b->Prec * BIGDECIMAL_COMPONENT_FIGURES);  | 
 | 147 | +    VpAsgn(a2.real, a, 1);  | 
 | 148 | +    VpAsgn(b2.real, b, 1);  | 
 | 149 | +    VpSetSign(a2.real, 1);  | 
 | 150 | +    VpSetSign(b2.real, 1);  | 
 | 151 | +    a2.real->exponent = base_prec + div_prec;  | 
 | 152 | +    b2.real->exponent = base_prec;  | 
 | 153 | + | 
 | 154 | +    if ((ssize_t)a2.real->Prec > a2.real->exponent) {  | 
 | 155 | +        a2_frac = BigDecimal_frac(a2.bigdecimal);  | 
 | 156 | +        VpMidRound(a2.real, VP_ROUND_DOWN, 0);  | 
 | 157 | +    }  | 
 | 158 | +    divmod_newton(a2.bigdecimal, b2.bigdecimal, &div, &mod);  | 
 | 159 | +    if (a2_frac != Qnil) mod = BigDecimal_add(mod, a2_frac);  | 
 | 160 | + | 
 | 161 | +    c2 = GetBDValueMust(div);  | 
 | 162 | +    r2 = GetBDValueMust(mod);  | 
 | 163 | +    VpAsgn(c, c2.real, VpGetSign(a) * VpGetSign(b));  | 
 | 164 | +    VpAsgn(r, r2.real, VpGetSign(a));  | 
 | 165 | +    AddExponent(c, a->exponent);  | 
 | 166 | +    AddExponent(c, -b->exponent);  | 
 | 167 | +    AddExponent(c, -div_prec);  | 
 | 168 | +    AddExponent(r, a->exponent);  | 
 | 169 | +    AddExponent(r, -base_prec - div_prec);  | 
 | 170 | +    RB_GC_GUARD(a2.bigdecimal);  | 
 | 171 | +    RB_GC_GUARD(a2.bigdecimal);  | 
 | 172 | +    RB_GC_GUARD(c2.bigdecimal);  | 
 | 173 | +    RB_GC_GUARD(r2.bigdecimal);  | 
 | 174 | +    return Qnil;  | 
 | 175 | +}  | 
 | 176 | + | 
 | 177 | +static VALUE  | 
 | 178 | +ensure_restore_prec_limit(VALUE limit)  | 
 | 179 | +{  | 
 | 180 | +    VpSetPrecLimit(NUM2SIZET(limit));  | 
 | 181 | +    return Qnil;  | 
 | 182 | +}  | 
 | 183 | + | 
 | 184 | +static void  | 
 | 185 | +VpDivdNewton(Real *c, Real *r, Real *a, Real *b)  | 
 | 186 | +{  | 
 | 187 | +    Real *args[4] = {c, r, a, b};  | 
 | 188 | +    size_t pl = VpGetPrecLimit();  | 
 | 189 | +    VpSetPrecLimit(0);  | 
 | 190 | +    // Ensure restoring prec limit because some methods used in VpDivdNewtonInner may raise an exception  | 
 | 191 | +    rb_ensure(VpDivdNewtonInner, (VALUE)args, ensure_restore_prec_limit, SIZET2NUM(pl));  | 
 | 192 | +}  | 
0 commit comments