Add `{checked_,overflowing_,wrapping_,}div_rem` #11

nlordell · 2022-02-21T17:35:30Z

Since, internally, the udivmod intrinsic computes both the quotient and remainder, it makes sense to expose a member method on {I,U}256 so callers can take advantage of this.

The text was updated successfully, but these errors were encountered:

NCGThompson · 2023-10-26T03:59:19Z

Currently std doesn't have a div_rem function and relies on LLVM to merge / and %. Therefor, we don't have an exact mold to fill.

The most obvious and probably the best way to implement it is to add the methods to *int/api.rs. The only catch is that if std finally implements div_rem at a later date, it may have slightly different function signatures. Another (not mutex) option is to add a num-integer compatibility feature, but this is probably overkill.

Here is an example of a function signature:

pub fn checked_div_rem(&self, rhs: &Self) -> Option<(Self, Self)>

corresponding to the dividend, the divisor, the quotient, and the remainder respectively.

nlordell · 2023-10-26T06:20:20Z

I've kept non-std APIs in uint.rs and int.rs, so I would put it there. the api.rs was really meant to mimic the STD API as much as possible (non-standard functions are likely there by accident and not on purpose).

nlordell · 2023-10-26T06:21:16Z

We can also make a new extra module (or some other name) for this kind of function if you think it is worth it, but at this point, I'd probably just stick it in int.rs and uint.rs.

NCGThompson · 2023-10-27T17:31:05Z

Signed div_euclid has both a / and a % of the same operands.

ethnum-rs/src/int/api.rs

Lines 1894 to 1900 in 1cd9388

    
           pub fn div_euclid(self, rhs: Self) -> Self { 
        
               let q = self / rhs; 
        
               if self % rhs < 0 { 
        
                   return if rhs > 0 { q - 1 } else { q + 1 }; 
        
               } 
        
               q 
        
           }

I wrapped the function in an #[inline(never)] function foo, and did a release build to x86_64 with out LLVM intrinsics. Apparently the compiler is unable to optimize them together to one divrem call. It also looks like it has two different possibilities for panicking, but I'm unsure what they are.. You can find the function calls by skimming for assembly lines that are longer than the others.

This is an example of the usefulness of a divrem function.

Assembly output

ethnum::int::I256::foo:
 push    rbp
 push    r15
 push    r14
 push    r13
 push    r12
 push    rbx
 sub     rsp, 248
 mov     qword, ptr, [rsp, +, 88], rdi
 mov     rdi, qword, ptr, [rsi]
 mov     r8, qword, ptr, [rsi, +, 8]
 mov     r13, qword, ptr, [rsi, +, 24]
 mov     r9, qword, ptr, [rsi, +, 16]
 xorps   xmm0, xmm0
 movups  xmmword, ptr, [rsp, +, 8], xmm0
 xorps   xmm3, xmm3
 movups  xmmword, ptr, [rsp, +, 24], xmm0
 movdqu  xmm0, xmmword, ptr, [rsp, +, 8]
 movdqu  xmm1, xmmword, ptr, [rdx]
 pcmpeqb xmm1, xmm0
 movdqu  xmm0, xmmword, ptr, [rdx, +, 16]
 movdqu  xmm2, xmmword, ptr, [rsp, +, 24]
 pcmpeqb xmm2, xmm0
 pand    xmm2, xmm1
 pmovmskb eax, xmm2
 cmp     eax, 65535
 je      .LBB10_1
 mov     rsi, r13
 shr     rsi, 63
 mov     r12, r13
 sar     r12, 63
 mov     rbp, qword, ptr, [rdx, +, 24]
 mov     rax, rbp
 shr     rax, 63
 mov     qword, ptr, [rsp], rdx
 mov     r14, rbp
 sar     r14, 63
 mov     rcx, qword, ptr, [rsp]
 mov     r15, qword, ptr, [rcx, +, 16]
 xor     r15, r14
 xor     r8, r12
 xor     rdi, r12
 xor     r13, r12
 xor     r9, r12
 mov     qword, ptr, [rsp, +, 176], rsi
 add     r9, rsi
 adc     r13, 0
 sub     rdi, r12
 sbb     r8, r12
 sbb     r9, 0
 sbb     r13, 0
 mov     qword, ptr, [rsp, +, 168], rdi
 mov     qword, ptr, [rsp, +, 96], rdi
 mov     qword, ptr, [rsp, +, 160], r8
 mov     qword, ptr, [rsp, +, 104], r8
 mov     qword, ptr, [rsp, +, 152], r9
 mov     qword, ptr, [rsp, +, 112], r9
 mov     rcx, qword, ptr, [rsp]
 mov     rdx, qword, ptr, [rcx, +, 8]
 xor     rdx, r14
 mov     rcx, qword, ptr, [rsp]
 mov     rbx, qword, ptr, [rcx]
 xor     rbx, r14
 xor     rbp, r14
 add     r15, rax
 adc     rbp, 0
 sub     rbx, r14
 sbb     rdx, r14
 sbb     r15, 0
 mov     qword, ptr, [rsp, +, 120], r13
 sbb     rbp, 0
 mov     qword, ptr, [rsp, +, 8], rbx
 mov     qword, ptr, [rsp, +, 144], rdx
 mov     qword, ptr, [rsp, +, 16], rdx
 mov     qword, ptr, [rsp, +, 24], r15
 mov     qword, ptr, [rsp, +, 32], rbp
 xor     r14, r12
 lea     rdi, [rsp, +, 184]
 lea     rsi, [rsp, +, 96]
 lea     rdx, [rsp, +, 8]
 xor     ecx, ecx
 call    qword, ptr, [rip, +, _ZN6ethnum10intrinsics6native6divmod8udivmod417h2baa86b59d534686E@GOTPCREL]
 mov     rsi, qword, ptr, [rsp, +, 184]
 mov     rcx, qword, ptr, [rsp, +, 192]
 mov     qword, ptr, [rsp, +, 128], rcx
 xor     rcx, r14
 mov     qword, ptr, [rsp, +, 136], rsi
 xor     rsi, r14
 mov     rax, qword, ptr, [rsp, +, 208]
 xor     rax, r14
 mov     rdx, qword, ptr, [rsp, +, 200]
 xor     rdx, r14
 sub     rdx, r14
 sbb     rax, r14
 sub     rsi, r14
 mov     qword, ptr, [rsp, +, 56], rsi
 sbb     rcx, r14
 mov     qword, ptr, [rsp, +, 72], rcx
 mov     r14, qword, ptr, [rsp]
 sbb     rdx, 0
 mov     qword, ptr, [rsp, +, 64], rdx
 sbb     rax, 0
 mov     qword, ptr, [rsp, +, 80], rax
 pxor    xmm0, xmm0
 movdqu  xmmword, ptr, [rsp, +, 8], xmm0
 movdqu  xmmword, ptr, [rsp, +, 24], xmm0
 movdqu  xmm0, xmmword, ptr, [rsp, +, 8]
 movdqu  xmm1, xmmword, ptr, [r14]
 pcmpeqb xmm1, xmm0
 movdqu  xmm0, xmmword, ptr, [r14, +, 16]
 movdqu  xmm2, xmmword, ptr, [rsp, +, 24]
 pcmpeqb xmm2, xmm0
 pand    xmm2, xmm1
 pmovmskb eax, xmm2
 cmp     eax, 65535
 je      .LBB10_4
 mov     rax, qword, ptr, [rsp, +, 152]
 mov     qword, ptr, [rsp, +, 112], rax
 mov     rax, qword, ptr, [rsp, +, 160]
 mov     qword, ptr, [rsp, +, 104], rax
 mov     rax, qword, ptr, [rsp, +, 168]
 mov     qword, ptr, [rsp, +, 96], rax
 mov     qword, ptr, [rsp, +, 120], r13
 mov     qword, ptr, [rsp, +, 32], rbp
 mov     qword, ptr, [rsp, +, 24], r15
 mov     rax, qword, ptr, [rsp, +, 144]
 mov     qword, ptr, [rsp, +, 16], rax
 mov     qword, ptr, [rsp, +, 8], rbx
 lea     rdi, [rsp, +, 184]
 lea     rsi, [rsp, +, 96]
 lea     rdx, [rsp, +, 8]
 lea     rcx, [rsp, +, 216]
 call    qword, ptr, [rip, +, _ZN6ethnum10intrinsics6native6divmod8udivmod417h2baa86b59d534686E@GOTPCREL]
 mov     rax, qword, ptr, [rsp, +, 224]
 xor     rax, r12
 mov     rcx, qword, ptr, [rsp, +, 216]
 xor     rcx, r12
 mov     rdx, qword, ptr, [rsp, +, 240]
 xor     rdx, r12
 mov     rsi, qword, ptr, [rsp, +, 232]
 xor     rsi, r12
 add     rsi, qword, ptr, [rsp, +, 176]
 adc     rdx, 0
 cmp     rcx, r12
 sbb     rax, r12
 sbb     rsi, 0
 sbb     rdx, 0
 js      .LBB10_7
 mov     rax, qword, ptr, [rsp, +, 88]
 mov     rcx, qword, ptr, [rsp, +, 56]
 mov     qword, ptr, [rax], rcx
 mov     rcx, qword, ptr, [rsp, +, 72]
 mov     qword, ptr, [rax, +, 8], rcx
 mov     rcx, qword, ptr, [rsp, +, 64]
 mov     qword, ptr, [rax, +, 16], rcx
 mov     rcx, qword, ptr, [rsp, +, 80]
 jmp     .LBB10_11
.LBB10_7:
 movdqu  xmm0, xmmword, ptr, [r14]
 movdqu  xmm1, xmmword, ptr, [r14, +, 16]
 pshufd  xmm2, xmm1, 238
 movq    rax, xmm2
 pxor    xmm2, xmm2
 pcmpeqd xmm1, xmm2
 movmskps ecx, xmm1
 xor     ecx, 15
 pcmpeqd xmm0, xmm2
 movmskps edx, xmm0
 xor     esi, esi
 xor     edx, 15
 setne   sil
 xor     edx, edx
 cmp     rdx, qword, ptr, [r14, +, 16]
 sbb     rdx, rax
 setl    al
 test    ecx, ecx
 movzx   eax, al
 cmovne  esi, eax
 test    sil, sil
 mov     rax, qword, ptr, [rsp, +, 88]
 je      .LBB10_8
 mov     rdi, qword, ptr, [rsp, +, 56]
 add     rdi, -1
 mov     rdx, qword, ptr, [rsp, +, 72]
 adc     rdx, -1
 xor     ecx, ecx
 mov     rsi, qword, ptr, [rsp, +, 128]
 or      qword, ptr, [rsp, +, 136], rsi
 sete    cl
 mov     rsi, qword, ptr, [rsp, +, 64]
 sub     rsi, rcx
 mov     rcx, qword, ptr, [rsp, +, 80]
 sbb     rcx, 0
 jmp     .LBB10_10
.LBB10_8:
 mov     rdi, qword, ptr, [rsp, +, 56]
 add     rdi, 1
 mov     rdx, qword, ptr, [rsp, +, 72]
 adc     rdx, 0
 mov     rsi, qword, ptr, [rsp, +, 64]
 adc     rsi, 0
 mov     rcx, qword, ptr, [rsp, +, 80]
 adc     rcx, 0
.LBB10_10:
 mov     qword, ptr, [rax], rdi
 mov     qword, ptr, [rax, +, 8], rdx
 mov     qword, ptr, [rax, +, 16], rsi
.LBB10_11:
 mov     qword, ptr, [rax, +, 24], rcx
 add     rsp, 248
 pop     rbx
 pop     r12
 pop     r13
 pop     r14
 pop     r15
 pop     rbp
 ret
.LBB10_1:
 lea     rax, [rip, +, .L__unnamed_22]
 mov     qword, ptr, [rsp, +, 8], rax
 mov     qword, ptr, [rsp, +, 16], 1
 lea     rax, [rip, +, .L__unnamed_7]
 mov     qword, ptr, [rsp, +, 24], rax
 movups  xmmword, ptr, [rsp, +, 32], xmm3
 lea     rsi, [rip, +, .L__unnamed_23]
 lea     rdi, [rsp, +, 8]
 call    qword, ptr, [rip, +, _ZN4core9panicking9panic_fmt17hd1e971d8d7c78e0eE@GOTPCREL]
 ud2
.LBB10_4:
 lea     rax, [rip, +, .L__unnamed_6]
 mov     qword, ptr, [rsp, +, 8], rax
 mov     qword, ptr, [rsp, +, 16], 1
 lea     rax, [rip, +, .L__unnamed_7]
 mov     qword, ptr, [rsp, +, 24], rax
 pxor    xmm0, xmm0
 movdqu  xmmword, ptr, [rsp, +, 32], xmm0
 lea     rsi, [rip, +, .L__unnamed_23]
 lea     rdi, [rsp, +, 8]
 call    qword, ptr, [rip, +, _ZN4core9panicking9panic_fmt17hd1e971d8d7c78e0eE@GOTPCREL]
 ud2

nlordell · 2023-10-29T16:38:36Z

It also looks like it has two different possibilities for panicking, but I'm unsure what they are

I imagine it is for rhs = 0 (the two possibilities being the / and % which aren't being optimised into a single call as you observed).

nlordell added the good first issue Good for newcomers label Feb 22, 2022

NCGThompson linked a pull request Oct 28, 2023 that will close this issue

Add Stable Div Rem #32

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add `{checked_,overflowing_,wrapping_,}div_rem` #11

Add `{checked_,overflowing_,wrapping_,}div_rem` #11

nlordell commented Feb 21, 2022

NCGThompson commented Oct 26, 2023

nlordell commented Oct 26, 2023

nlordell commented Oct 26, 2023 •

edited

Loading

NCGThompson commented Oct 27, 2023 •

edited

Loading

nlordell commented Oct 29, 2023

Add {checked_,overflowing_,wrapping_,}div_rem #11

Add {checked_,overflowing_,wrapping_,}div_rem #11

Comments

nlordell commented Feb 21, 2022

NCGThompson commented Oct 26, 2023

nlordell commented Oct 26, 2023

nlordell commented Oct 26, 2023 • edited Loading

NCGThompson commented Oct 27, 2023 • edited Loading

nlordell commented Oct 29, 2023

Add `{checked_,overflowing_,wrapping_,}div_rem` #11

Add `{checked_,overflowing_,wrapping_,}div_rem` #11

nlordell commented Oct 26, 2023 •

edited

Loading

NCGThompson commented Oct 27, 2023 •

edited

Loading