From 7d59741668a5cfce83df71b033d11ae53c334fe6 Mon Sep 17 00:00:00 2001 From: Remoun Metyas Date: Fri, 11 Mar 2022 15:07:35 -0800 Subject: [PATCH] Upgrade rust version (#16) * Update rust-toolchain version. * Replace llvm_asm with stabilized asm macro. * Update asm comments * Remove unused helper. * Bump package versions. --- Cargo.lock | 10 +- aligned-cmov/Cargo.toml | 2 +- aligned-cmov/src/cmov_impl_asm.rs | 222 ++++++++++++++---------------- aligned-cmov/src/lib.rs | 1 - balanced-tree-index/Cargo.toml | 4 +- mc-oblivious-map/Cargo.toml | 4 +- mc-oblivious-ram/Cargo.toml | 4 +- mc-oblivious-ram/src/lib.rs | 11 +- mc-oblivious-traits/Cargo.toml | 6 +- no-asm-tests/Cargo.lock | 10 +- no-asm-tests/Cargo.toml | 6 +- rust-toolchain | 2 +- 12 files changed, 129 insertions(+), 153 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cadb2e9..481214c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,7 +14,7 @@ dependencies = [ [[package]] name = "aligned-cmov" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-array", "criterion", @@ -40,7 +40,7 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "balanced-tree-index" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-cmov", "rand_core", @@ -306,7 +306,7 @@ dependencies = [ [[package]] name = "mc-oblivious-map" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-array", "aligned-cmov", @@ -322,7 +322,7 @@ dependencies = [ [[package]] name = "mc-oblivious-ram" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-cmov", "balanced-tree-index", @@ -333,7 +333,7 @@ dependencies = [ [[package]] name = "mc-oblivious-traits" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-cmov", "balanced-tree-index", diff --git a/aligned-cmov/Cargo.toml b/aligned-cmov/Cargo.toml index 3b917fa..d79e480 100644 --- a/aligned-cmov/Cargo.toml +++ b/aligned-cmov/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aligned-cmov" -version = "2.0.0" +version = "2.1.0" description = "Fast constant-time conditional moves of aligned bytes" authors = ["MobileCoin"] license = "GPL-3.0" diff --git a/aligned-cmov/src/cmov_impl_asm.rs b/aligned-cmov/src/cmov_impl_asm.rs index 4638eda..017ab11 100644 --- a/aligned-cmov/src/cmov_impl_asm.rs +++ b/aligned-cmov/src/cmov_impl_asm.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2018-2021 The MobileCoin Foundation +// Copyright (c) 2018-2022 The MobileCoin Foundation //! Implementation of cmov on x86-64 using inline assembly. //! @@ -19,45 +19,41 @@ //! For now it seems simplest to use inline assembly for all of it. use super::{A64Bytes, A8Bytes, ArrayLength}; +use core::arch::asm; // CMov for u32 values #[inline] pub fn cmov_u32(condition: bool, src: &u32, dest: &mut u32) { - // Create a temporary to be assigned to a register for cmov - let mut temp: u32 = *dest; - // Test condition ($1) - // cmovnz from src into temp ($2 to $0) unsafe { - llvm_asm!("test $1, $1 - cmovnz $0, $2" - :"+&r"(temp) - : "r"(condition), "rm"(*src) - : "cc" - : "volatile", "intel"); + asm!( + // Set ZF if cond==0 + "test {0}, {0}", + // Conditionally move src into temp (based on ZF) + // `:e` formats to the 32-bit register. + "cmovnz {2:e}, {1:e}", + in(reg_byte) condition as u8, + in(reg) *src, + // inout since we might not write, so we need the existing value. + inout(reg) *dest, + ); } - // "cc" is because we are setting flags in test - // "volatile" is meant to discourage the optimizer from inspecting this - *dest = temp; } // CMov for u64 values #[inline] pub fn cmov_u64(condition: bool, src: &u64, dest: &mut u64) { - // Create a temporary to be assigned to a register for cmov - let mut temp: u64 = *dest; - // Test condition ($1) - // cmovnz from src into temp ($2 to $0) unsafe { - llvm_asm!("test $1, $1 - cmovnz $0, $2" - :"+&r"(temp) - : "r"(condition), "rm"(*src) - : "cc" - : "volatile", "intel"); + asm!( + // Set ZF if cond==0 + "test {0}, {0}", + // Conditionally move src into dest (based on ZF) + "cmovnz {2}, {1}", + in(reg_byte) condition as u8, + in(reg) *src, + // inout since we might not write, so we need the existing value. + inout(reg) *dest, + ); } - // "cc" is because we are setting flags in test - // "volatile" is meant to discourage the optimizer from inspecting this - *dest = temp; } // CMov for i32 values @@ -124,7 +120,6 @@ pub fn cmov_a64_bytes>( // If we have avx2 then we can use cmov_byte_slice_a64 implementation #[cfg(target_feature = "avx2")] #[inline] -#[allow(unused)] pub fn cmov_a64_bytes>( condition: bool, src: &A64Bytes, @@ -151,13 +146,8 @@ pub fn cmov_a64_bytes>( // if condition { memcpy(dest, src, count * 8) } // for pointers aligned to 8 byte boundary. Assumes count > 0 #[inline] -#[allow(unused)] -unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, mut count: usize) { - // Create a temporary to be assigned to a register for cmov - // We also use it to pass the condition in, which is only needed in a register - // before entering the loop. - let mut temp: u64 = condition as u64; - +unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, count: usize) { + debug_assert!(count > 0, "count cannot be 0"); // The idea here is to test once before we enter the loop and reuse the test // result for every cmov. // The loop is a dec, jnz loop. @@ -167,109 +157,105 @@ unsafe fn cmov_byte_slice_a8(condition: bool, src: *const u64, dest: *mut u64, m // indexing. Because dec will clobber ZF, we can't use cmovnz. We store // the result of outer test in CF which is not clobbered by dec. // cmovc is contingent on CF - // negb is used to set CF to 1 iff the condition value was 1. + // neg is used to set CF to 1 iff the condition value was 1. // Pity, we cannot cmov directly to memory // - // temp is a "register output" because this is the way to obtain a scratch - // register when we don't care what actual register is used and we want the - // compiler to pick. - // // count is modified by the assembly -- for this reason it has to be labelled // as an input and an output, otherwise its illegal to modify it. // - // The condition is passed in through temp, then neg moves the value to CF. - // After that we don't need condition in a register, so temp register can be + // The condition is passed in through cond, then neg moves the value to CF. + // After that we don't need condition in a register, so cond register can be // reused. - llvm_asm!("neg $0 - loop_body_${:uid}: - mov $0, [$3 + 8*$1 - 8] - cmovc $0, [$2 + 8*$1 - 8] - mov [$3 + 8*$1 - 8], $0 - dec $1 - jnz loop_body_${:uid}" - : "+&r"(temp), "+&r"(count) - : "r"(src), "r"(dest) - : "cc", "memory" - : "volatile", "intel"); - // cc is because we are setting flags in test - // memory is because we are dereferencing a bunch of pointers in asm - // volatile is because the output variable "temp" is not the true output - // of this block, the memory side-effects are. + asm!( + // Sets CF=0 iff cond==0, 1 otherwise. + "neg {0}", + // Must use numeric local labels, since this block may be inlined multiple times. + // https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels + "42:", + // Copy into temp register from dest (for NOP default). + "mov {0}, [{3} + 8*{1} - 8]", + // Conditionally copy into temp register from src (based on CF). + "cmovc {0}, [{2} + 8*{1} - 8]", + // Copy from temp register into dest. + "mov [{3} + 8*{1} - 8], {0}", + // Decrement count. Sets ZF=1 when we reach 0. + "dec {1}", + // If ZF is not set, loop. + "jnz 42b", + // Discard outputs; the memory side effects are what's desired. + inout(reg) (condition as u64) => _, + inout(reg) count => _, + in(reg) src, + in(reg) dest, + ); } // Should be a constant time function equivalent to: // if condition { memcpy(dest, src, num_bytes) } // for pointers aligned to *64* byte boundary. Will fault if that is not the -// case. Assumes num_bytes > 0, and num_bytes divisible by 64! +// case. Requires num_bytes > 0, and num_bytes divisible by 64. // This version uses AVX2 256-bit moves #[cfg(target_feature = "avx2")] #[inline] -#[allow(unused)] unsafe fn cmov_byte_slice_a64(condition: bool, src: *const u64, dest: *mut u64, num_bytes: usize) { - debug_assert!( - num_bytes > 0, - "num_bytes cannot be 0, caller must handle that" - ); - debug_assert!( - num_bytes % 64 == 0, - "num_bytes must be divisible by 64, caller must handle that" - ); - // Similarly as before, we want to test once and use the test result for the - // whole loop. - // - // Before we enter the loop, we want to set ymm1 to all 0s or all 1s, depending - // on condition. We use neg to make it all 0s or all 1s 64bit, then vmovq to - // move that to xmm2, then vbroadcastsd to fill ymm1 with 1s or zeros. - // - // This time the cmov mechanism is: - // - VMOVDQA to load the source into a ymm register, - // - VPMASKMOVQ to move that to memory, after masking it with ymm1. - // An alternative approach which I didn't test is, MASKMOV to another ymm - // register, then move that register to memory. - // + debug_assert!(num_bytes > 0, "num_bytes cannot be 0"); + debug_assert!(num_bytes % 64 == 0, "num_bytes must be divisible by 64"); + // Notes: - // temp = $0 is the scratch register - // We aren't allowed to modify condition or num_bytes = $3 unless - // we annotate them appropriately as outputs. - // So, num_bytes is an in/out register, and we pass condition in via - // the scratch register, instead of a dedicated register. + // temp = {0} is the scratch register // - // Once we have the mask in ymm1 we don't need the condition in a register - // anymore. Then, $0 is the loop variable, counting down from num_bytes in - // steps of 64 - // - // The values of $0 in the loop are num_bytes, num_bytes - 64, ... 64, + // The values of {0} in the loop are num_bytes, num_bytes - 64, ... 64, // rather than num_bytes - 64 ... 0. This is because the semantics of the loop // are subtract 64, then test for 0, rather than test for 0, then subtract. - // So when we index using $0, we subtract 64 to compensate. - // - // We unroll the loop once because we can assume 64 byte alignment, but ymm - // register holds 32 bytes. So one round of vmovdqa, vpmaskmovq moves 32 bytes. - // So we do this twice in one pass through the loop. - // - // TODO: In AVX512 zmm registers we could move 64 bytes at once... + // So when we index using {0}, we subtract 64 to compensate. // TODO: Does unrolling the loop more help? - let mut temp: u64 = condition as u64; - - llvm_asm!("neg $0 - vmovq xmm2, $0 - vbroadcastsd ymm1, xmm2 - mov $0, $3 - loop_body2_${:uid}: - vmovdqa ymm2, [$1 + $0 - 64] - vpmaskmovq [$2 + $0 - 64], ymm1, ymm2 - vmovdqa ymm3, [$1 + $0 - 32] - vpmaskmovq [$2 + $0 - 32], ymm1, ymm3 - sub $0, 64 - jnz loop_body2_${:uid}" - : "+&r"(temp) - : "r"(src), "r"(dest), "rmi"(num_bytes) - : "cc", "memory", "ymm1", "ymm2", "ymm3" - : "volatile", "intel"); - // cc is because we are setting flags - // memory is because we are dereferencing a bunch of pointers in asm - // volatile is because the output variable "temp" is not the true output - // of this block, the memory side-effects are. + asm!( + // Similarly to cmov_byte_slice_a8, we want to test once and use the + // result for the whole loop. + // Before we enter the loop, we want to set ymm1 to all 0s or all 1s, + // depending on condition. We use neg to make all 64 bits 0s or all 1s + // (since neg(0) = 0 and neg(1) = -1 = 11111111b in two's complement), + // then vmovq to move that to xmm2, then vbroadcastsd to fill ymm1 + // with ones or zeros. + "neg {0}", + "vmovq xmm2, {0}", + "vbroadcastsd ymm1, xmm2", + // Once we have the mask in ymm1 we don't need the condition in + // a register anymore. Then, {0} is the loop variable, counting down + // from num_bytes in steps of 64 + "mov {0}, {3}", + // Must use numeric local labels, since this block may be inlined multiple times. + // https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels + "42:", + // This time the cmov mechanism is: + // - VMOVDQA to load the source into a ymm register, + // - VPMASKMOVQ to move that to memory, after masking it with ymm1. + // An alternative approach which I didn't test is, MASKMOV to + // another ymm register, then move that register to memory. + "vmovdqa ymm2, [{1} + {0} - 64]", + "vpmaskmovq [{2} + {0} - 64], ymm1, ymm2", + // We unroll the loop once because we can assume 64 byte alignment, + // but ymm register holds 32 bytes. So one round of + // vmovdqa+vpmaskmovq moves 32 bytes. + // So we do this twice in one pass through the loop. + // + // TODO: In AVX512 zmm registers we could move 64 bytes at once... + "vmovdqa ymm3, [{1} + {0} - 32]", + "vpmaskmovq [{2} + {0} - 32], ymm1, ymm3", + // Decrement num_bytes. Sets ZF=1 when we reach 0. + "sub {0}, 64", + // If ZF is not set, loop. + "jnz 42b", + // Discard output; the memory side effects are what's desired. + inout(reg) condition as u64 => _, + in(reg) src, + in(reg) dest, + in(reg) num_bytes, + // Scratch/Temp registers. + out("ymm1") _, + out("ymm2") _, + out("ymm3") _, + ); } // TODO: In avx512 there is vmovdqa which takes a mask register, and kmovq to diff --git a/aligned-cmov/src/lib.rs b/aligned-cmov/src/lib.rs index b9bb478..6ef3c61 100644 --- a/aligned-cmov/src/lib.rs +++ b/aligned-cmov/src/lib.rs @@ -1,7 +1,6 @@ // Copyright (c) 2018-2021 The MobileCoin Foundation #![no_std] -#![feature(llvm_asm)] pub use aligned_array::{subtle, Aligned, AsAlignedChunks, AsNeSlice, A64, A8}; pub use generic_array::{arr, typenum, ArrayLength, GenericArray}; diff --git a/balanced-tree-index/Cargo.toml b/balanced-tree-index/Cargo.toml index ea5731a..018c781 100644 --- a/balanced-tree-index/Cargo.toml +++ b/balanced-tree-index/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "balanced-tree-index" -version = "2.0.0" +version = "2.1.0" description = "Utilities for constant-time manipulation of a complete binary tree with a flat in-memory representation." authors = ["MobileCoin"] license = "GPL-3.0" @@ -11,7 +11,7 @@ keywords = ["binary-tree", "constant-time", "utilities", "no_std"] categories = ["cryptography", "data-structures"] [dependencies] -aligned-cmov = { path = "../aligned-cmov", version = "2" } +aligned-cmov = { path = "../aligned-cmov", version = "2.1" } rand_core = { version = "0.6", default-features = false } diff --git a/mc-oblivious-map/Cargo.toml b/mc-oblivious-map/Cargo.toml index 7834799..9593d44 100644 --- a/mc-oblivious-map/Cargo.toml +++ b/mc-oblivious-map/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mc-oblivious-map" -version = "2.0.0" +version = "2.1.0" description = "Implementation of Oblivious Hash Map data structures on top of Oblivious RAM" authors = ["MobileCoin"] license = "GPL-3.0" @@ -15,7 +15,7 @@ no_asm_insecure = ["aligned-cmov/no_asm_insecure"] [dependencies] aligned-array = { version = "1", features = ["subtle"] } -aligned-cmov = { path = "../aligned-cmov", version = "2" } +aligned-cmov = { path = "../aligned-cmov", version = "2.1" } mc-oblivious-traits = { path = "../mc-oblivious-traits", version = "2" } generic-array = { version = "0.14", default-features = false } diff --git a/mc-oblivious-ram/Cargo.toml b/mc-oblivious-ram/Cargo.toml index b23c385..6d2412f 100644 --- a/mc-oblivious-ram/Cargo.toml +++ b/mc-oblivious-ram/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mc-oblivious-ram" -version = "2.0.0" +version = "2.1.0" description = "Implementations of Oblivious RAM data structures" authors = ["MobileCoin"] license = "GPL-3.0" @@ -14,7 +14,7 @@ categories = ["cryptography", "data-structures", "no-std"] no_asm_insecure = ["aligned-cmov/no_asm_insecure"] [dependencies] -aligned-cmov = { path = "../aligned-cmov", version = "2" } +aligned-cmov = { path = "../aligned-cmov", version = "2.1" } balanced-tree-index = { path = "../balanced-tree-index", version = "2" } mc-oblivious-traits = { path = "../mc-oblivious-traits", version = "2" } diff --git a/mc-oblivious-ram/src/lib.rs b/mc-oblivious-ram/src/lib.rs index 1abc378..9d44ebe 100644 --- a/mc-oblivious-ram/src/lib.rs +++ b/mc-oblivious-ram/src/lib.rs @@ -96,22 +96,13 @@ where mod testing { use super::*; - use aligned_cmov::{A64Bytes, A8Bytes, ArrayLength}; + use aligned_cmov::{A64Bytes, ArrayLength}; use mc_oblivious_traits::{rng_maker, testing, HeapORAMStorageCreator, ORAM}; use test_helper::{run_with_several_seeds, RngType}; const STASH_SIZE: usize = 16; // Helper to make tests more succinct - #[allow(unused)] - fn a8_bytes>(src: u8) -> A8Bytes { - let mut result = A8Bytes::::default(); - for byte in result.iter_mut() { - *byte = src; - } - result - } - fn a64_bytes>(src: u8) -> A64Bytes { let mut result = A64Bytes::::default(); for byte in result.iter_mut() { diff --git a/mc-oblivious-traits/Cargo.toml b/mc-oblivious-traits/Cargo.toml index 8362d3a..9d57b2d 100644 --- a/mc-oblivious-traits/Cargo.toml +++ b/mc-oblivious-traits/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mc-oblivious-traits" -version = "2.0.0" +version = "2.1.0" description = "Traits and interfaces for components related to Oblivious data structures" authors = ["MobileCoin"] license = "GPL-3.0" @@ -14,7 +14,7 @@ categories = ["cryptography", "data-structures", "no-std"] no_asm_insecure = ["aligned-cmov/no_asm_insecure"] [dependencies] -aligned-cmov = { path = "../aligned-cmov", version = "2" } -balanced-tree-index = { path = "../balanced-tree-index", version = "2" } +aligned-cmov = { path = "../aligned-cmov", version = "2.1" } +balanced-tree-index = { path = "../balanced-tree-index", version = "2.1" } rand_core = { version = "0.6", default-features = false } diff --git a/no-asm-tests/Cargo.lock b/no-asm-tests/Cargo.lock index 8c94f54..a2d40c1 100644 --- a/no-asm-tests/Cargo.lock +++ b/no-asm-tests/Cargo.lock @@ -14,7 +14,7 @@ dependencies = [ [[package]] name = "aligned-cmov" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-array", "generic-array", @@ -22,7 +22,7 @@ dependencies = [ [[package]] name = "balanced-tree-index" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-cmov", "rand_core", @@ -40,7 +40,7 @@ dependencies = [ [[package]] name = "mc-oblivious-ram" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-cmov", "balanced-tree-index", @@ -50,7 +50,7 @@ dependencies = [ [[package]] name = "mc-oblivious-traits" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-cmov", "balanced-tree-index", @@ -59,7 +59,7 @@ dependencies = [ [[package]] name = "no-asm-tests" -version = "2.0.0" +version = "2.1.0" dependencies = [ "aligned-cmov", "balanced-tree-index", diff --git a/no-asm-tests/Cargo.toml b/no-asm-tests/Cargo.toml index d5f3508..e1da716 100644 --- a/no-asm-tests/Cargo.toml +++ b/no-asm-tests/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "no-asm-tests" -version = "2.0.0" +version = "2.1.0" description = "A crate in another workspace which sets the no_asm_insecure flag and runs tests that way" authors = ["MobileCoin"] edition = "2018" @@ -9,6 +9,6 @@ readme = "README.md" [dependencies] aligned-cmov = { path = "../aligned-cmov", features = ["no_asm_insecure"] } balanced-tree-index = { path = "../balanced-tree-index" } -mc-oblivious-traits = { path = "../mc-oblivious-traits" } -mc-oblivious-ram = { path = "../mc-oblivious-ram" } +mc-oblivious-traits = { path = "../mc-oblivious-traits", features = ["no_asm_insecure"] } +mc-oblivious-ram = { path = "../mc-oblivious-ram", features = ["no_asm_insecure"] } test-helper = { path = "../test-helper" } diff --git a/rust-toolchain b/rust-toolchain index 3a29ac5..a9c709b 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2021-07-21 +nightly-2022-02-22