Compiler output analysis of x mod 16 Vs. x & 15

Why are They the Same?

When dealing with powers of 2, specifically for modulo operations, the modulo operation a % n (where n is a power of 2) can be replaced with a bitwise AND operation a & (n-1). This is because the last log2(n) bits of the binary representation of a number determine the remainder when divided by n.

Comparing Implementations

Let's examine the following implementations in C and Rust.

C code

#include <stdio.h>
int mod(int a) {
    return a % 16;
}

int bitwise_and(int a) {
    return a & 15;
}

Rust code

#[no_mangle]
pub fn moded(num: i32) -> i32 { // mod is a rust keyword
    num % 16
}

#[no_mangle]
pub fn anded(num: i32) -> i32 {
    num & 15
}

Compiler output without optimization

C output (-O0 flag)

mod:
    push    rbp                         ; Save the base pointer (rbp) on the stack
    mov     rbp, rsp                    ; Set the base pointer to the current stack pointer (rsp)
    mov     DWORD PTR [rbp-4], edi      ; Store the input value (edi) at the memory location [rbp-4]
    mov     edx, DWORD PTR [rbp-4]      ; Move the input value from memory to the edx register
    mov     eax, edx                    ; Copy the value from edx to eax
    sar     eax, 31                     ; Arithmetic right shift eax by 31 bits to extend the sign bit
    shr     eax, 28                     ; Logical right shift eax by 28 bits, shifting the sign bit into the lower bits
    add     edx, eax                    ; Add the shifted sign bit to edx
    and     edx, 15                     ; Perform bitwise AND with 15 (0xF) to compute edx % 16
    sub     edx, eax                    ; Subtract the shifted sign bit from edx
    mov     eax, edx                    ; Move the result from edx to eax
    pop     rbp                         ; Restore the base pointer (rbp) from the stack
    ret                                 ; Return from the function

bitwise_and:
    push    rbp                         ; Save the base pointer (rbp) on the stack
    mov     rbp, rsp                    ; Set the base pointer to the current stack pointer (rsp)
    mov     DWORD PTR [rbp-4], edi      ; Store the input value (edi) at the memory location [rbp-4]
    mov     eax, DWORD PTR [rbp-4]      ; Move the input value from memory to the eax register
    and     eax, 15                     ; Perform bitwise AND with 15 (0xF) to compute eax % 16
    pop     rbp                         ; Restore the base pointer (rbp) from the stack
    ret                                 ; Return from the function

Rust output (opt-level=0 flag)

moded:
    push    rax                         ; Save the rax register
    mov     dword ptr [rsp + 4], edi    ; Move the value of edi to the stack
    cmp     edi, -2147483648            ; Compare edi with -2147483648 (minimum 32-bit integer value)
    sete    al                          ; Set al to 1 if edi is -2147483648, else set to 0
    and     al, 0                       ; Perform bitwise AND with 0, which sets al to 0
    test    al, 1                       ; Test if al is 1 (always false since al is 0)
    jne     .LBB0_2                     ; Jump to .LBB0_2 if the zero flag is not set (never happens)
    mov     eax, dword ptr [rsp + 4]    ; Move the value from the stack to eax
    mov     ecx, 16                     ; Move 16 into ecx
    cdq                                 ; Sign extend eax into edx:eax
    idiv    ecx                         ; Signed division of edx:eax by ecx (16)
    mov     eax, edx                    ; Move the remainder from edx to eax
    pop     rcx                         ; Restore the rcx register
    ret                                 ; Return from the function

.LBB0_2:
    lea     rdi, [rip + .L__unnamed_1]  ; Load the address of the panic message into rdi
    mov     rax, qword ptr [rip + core::panicking::panic_const::panic_const_rem_overflow::hc3ec482d3b45d4ab@GOTPCREL]
                                        ; Load the address of the panic function into rax
    call    rax                         ; Call the panic function


anded:
	mov     eax, edi
	and     eax, 15
	ret

Both functions use the and operation with 15, but the mod function has additional operations to handle the sign of the integer and potential overflow errors.

Optimized build output

Optimized C code with -O3 flag

mod:
    mov     edx, edi                   ; Move the input value (edi) to edx
    sar     edx, 31                    ; Arithmetic right shift edx by 31 bits to propagate the sign bit
    shr     edx, 28                    ; Logical right shift edx by 28 bits, shifting the sign bit into the lower nibble
    lea     eax, [rdi+rdx]             ; Load effective address: add the shifted sign bit (rdx) to the input value (rdi) and store in eax
    and     eax, 15                    ; Perform bitwise AND with 15 (0xF) to compute eax % 16
    sub     eax, edx                   ; Subtract the adjusted sign bit from eax
    ret                                ; Return from the function

bitwise_and:
    mov     eax, edi                   ; Move the input value (edi) to eax
    and     eax, 15                    ; Perform bitwise AND with 15 (0xF) to compute eax % 16
    ret                                ; Return from the function

Optimized Rust code with opt-level=3 flag

moded:
    mov     eax, edi                  ; Move the input value (edi) to eax
    lea     ecx, [rax + 15]           ; Load effective address: add 15 to the input value (rax) and store in ecx
    test    edi, edi                  ; Test if the input value (edi) is zero
    cmovns  ecx, edi                  ; Conditional move: if edi is non-negative, move edi to ecx
    and     ecx, -16                  ; Perform bitwise AND with -16 (0xFFFFFFF0) to clear the lower 4 bits of ecx
    sub     eax, ecx                  ; Subtract the adjusted value (ecx) from eax
    ret                               ; Return from the function

anded:
    mov     eax, edi                  ; Move the input value (edi) to eax
    and     eax, 15                   ; Perform bitwise AND with 15 (0xF) to compute eax % 16
    ret                               ; Return from the function

The compilers have optimized register usage, but the sign is still there

Builds with unsigned integers

Updated C code

#include <stdio.h>
unsigned int mod(unsigned int a) {
    return a % 16;
}

unsigned int bitwise_and(unsigned int a) {
    return a & 15;
}

Updated Rust code

#[no_mangle]
pub fn moded(num: u32) -> u32 { // mod is a rust keyword
    num % 16
}

#[no_mangle]
pub fn anded(num: u32) -> u32 {
    num & 15
}

And now for the moment of truth let,s see the assembly outputs,

For C Code

-O0 flag

mod:
	push    rbp
	mov     rbp, rsp
	mov     DWORD PTR [rbp-4], edi
	mov     eax, DWORD PTR [rbp-4]
	and     eax, 15
	pop     rbp
	ret
bitwise_and:
	push    rbp
	mov     rbp, rsp
	mov     DWORD PTR [rbp-4], edi
	mov     eax, DWORD PTR [rbp-4]
	and     eax, 15
	pop     rbp
	ret

-O3 flag

mod:
	mov     eax, edi
	and     eax, 15
	ret
bitwise_and:
	mov     eax, edi
	and     eax, 15
	ret

For Rust Code

opt-level=0

moded:
	mov     eax, edi
	and     eax, 15
	ret

anded:
	mov     eax, edi
	and     eax, 15
	ret

The compiler has successfully made the mod to and optimization.

opt-level=3

anded:
	mov     eax, edi
	and     eax, 15
	ret

Here, the Rust compiler has replaced the moded function with anded everywhere due to their identical implementations.

Conclusion

Using bitwise operations instead of modulo for powers of 2 is efficient, and often the compiler handles these optimizations automatically. However, understanding when to use them and how they work can help in writing more efficient code.