From 33cc1aca739ceb838bdc23921885f936df17165f Mon Sep 17 00:00:00 2001 From: Christopher Patton Date: Wed, 8 Nov 2023 18:04:04 +0100 Subject: [PATCH] Replace SHAKE128 with TurboSHAKE128 The reference code uses the reference implementation of TurboSHAKE128. This code is unoptimized, so care is needed to ensure our tests run in a reasonable amount of time. Each time `XofTurboShake128` is constructed we call `TurboSHAKE128()` once and fill a buffer with the output stream. The size of the buffer is a constant, `MAX_XOF_OUT_STREAM_BYTES`, chosen to be sufficiently long for every test that we have. So that we don't have to make this value too large, some of tests in `vdaf_poplar1.py` have been modified. --- .gitmodules | 3 + draft-irtf-cfrg-vdaf.md | 98 ++++++++++++++---------------- poc/daf.py | 12 ++-- poc/draft-irtf-cfrg-kangarootwelve | 1 + poc/vdaf_poplar1.py | 22 +++---- poc/vdaf_prio3.py | 28 ++++----- poc/xof.py | 54 ++++++++-------- 7 files changed, 110 insertions(+), 108 deletions(-) create mode 100644 .gitmodules create mode 160000 poc/draft-irtf-cfrg-kangarootwelve diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..5d26f5a0 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "poc/draft-irtf-cfrg-kangarootwelve"] + path = poc/draft-irtf-cfrg-kangarootwelve + url = https://github.com/cfrg/draft-irtf-cfrg-kangarootwelve diff --git a/draft-irtf-cfrg-vdaf.md b/draft-irtf-cfrg-vdaf.md index 8ee26929..a9c7e635 100644 --- a/draft-irtf-cfrg-vdaf.md +++ b/draft-irtf-cfrg-vdaf.md @@ -31,13 +31,6 @@ author: organization: Google email: schoppmann@google.com -normative: - - FIPS202: - title: "SHA-3 Standard: Permutation-Based Hash and Extendable-Output Functions" - date: August 2015 - seriesinfo: NIST FIPS PUB 202 - informative: AGJOP21: @@ -1831,51 +1824,50 @@ def expand_into_vec(Xof, ~~~ {: #xof-derived-methods title="Derived methods for XOFs."} -### XofShake128 {#xof-shake128} +### XofTurboShake128 {#xof-turboshake128} -This section describes XofShake128, a XOF based on the SHAKE128 mode of -operation for the Keccak permutation {{FIPS202}}. This XOF is RECOMMENDED for -all use cases within VDAFs. The length of the domain separation string `dst` -passed to XofShake128 MUST NOT exceed 255 bytes. +This section describes XofTurboShake128, an XOF based on the +TurboSHAKE128 {{!TurboSHAKE=I-D.draft-irtf-cfrg-kangarootwelve}}. This +XOF is RECOMMENDED for all use cases within VDAFs. The length of the +domain separation string `dst` passed to XofTurboShake128 MUST NOT +exceed 255 bytes. ~~~ -class XofShake128(Xof): - """XOF based on SHA-3 (SHAKE128).""" +class XofTurboShake128(Xof): + """XOF wrapper for TurboSHAKE128.""" # Associated parameters SEED_SIZE = 16 def __init__(self, seed, dst, binder): self.l = 0 - self.x = seed + binder - self.s = dst + self.m = to_le_bytes(len(dst), 1) + dst + seed + binder def next(self, length: Unsigned) -> Bytes: self.l += length - # Function `SHAKE128(x, l)` is as defined in - # [FIPS 202, Section 6.2]. + # Function `TurboSHAKE128(M, D, L)` is as defined in + # Section 2.2 of [TurboSHAKE]. # # Implementation note: Rather than re-generate the output # stream each time `next()` is invoked, most implementations - # of SHA-3 will expose an "absorb-then-squeeze" API that + # of TurboSHAKE128 will expose an "absorb-then-squeeze" API that # allows stateful handling of the stream. - dst_length = to_le_bytes(len(self.s), 1) - stream = SHAKE128(dst_length + self.s + self.x, self.l) + stream = TurboSHAKE128(self.m, 1, self.l) return stream[-length:] ~~~ -{: title="Definition of XOF XofShake128."} +{: title="Definition of XOF XofTurboShake128."} ### XofFixedKeyAes128 {#xof-fixed-key-aes128} -While XofShake128 as described above can be securely used in all cases where a XOF -is needed in the VDAFs described in this document, there are some cases where -a more efficient instantiation based on fixed-key AES is possible. For now, this -is limited to the XOF used inside the Idpf {{idpf}} implementation in Poplar1 -{{idpf-poplar}}. It is NOT RECOMMENDED to use this XOF anywhere else. -The length of the domain separation string `dst` passed to XofFixedKeyAes128 -MUST NOT exceed 255 bytes. See Security Considerations {{security}} for a more -detailed discussion. +While XofTurboShake128 as described above can be securely used in all cases +where a XOF is needed in the VDAFs described in this document, there are some +cases where a more efficient instantiation based on fixed-key AES is possible. +For now, this is limited to the XOF used inside the Idpf {{idpf}} +implementation in Poplar1 {{idpf-poplar}}. It is NOT RECOMMENDED to use this +XOF anywhere else. The length of the domain separation string `dst` passed to +XofFixedKeyAes128 MUST NOT exceed 255 bytes. See Security Considerations +{{security}} for a more detailed discussion. ~~~ class XofFixedKeyAes128(Xof): @@ -1890,15 +1882,15 @@ class XofFixedKeyAes128(Xof): def __init__(self, seed, dst, binder): self.length_consumed = 0 - # Use SHA-3 to derive a key from the binder string and domain - # separation tag. Note that the AES key does not need to be - # kept secret from any party. However, when used with + # Use TurboSHAKE128 to derive a key from the binder string and + # domain separation tag. Note that the AES key does not need + # to be kept secret from any party. However, when used with # IdpfPoplar, we require the binder to be a random nonce. # # Implementation note: This step can be cached across XOF # evaluations with many different seeds. dst_length = to_le_bytes(len(dst), 1) - self.fixed_key = SHAKE128(dst_length + dst + binder, 16) + self.fixed_key = TurboSHAKE128(dst_length + dst + binder, 2, 16) self.seed = seed def next(self, length: Unsigned) -> Bytes: @@ -3178,9 +3170,9 @@ each can be found in {{test-vectors}}. Our first instance of Prio3 is for a simple counter: Each measurement is either one or zero and the aggregate result is the sum of the measurements. -This instance uses XofShake128 ({{xof-shake128}}) as its XOF. Its validity -circuit, denoted `Count`, uses `Field64` ({{fields}}) as its finite field. Its -gadget, denoted `Mul`, is the degree-2, arity-2 gadget defined as +This instance uses XofTurboShake128 ({{xof-turboshake128}}) as its XOF. Its +validity circuit, denoted `Count`, uses `Field64` ({{fields}}) as its finite +field. Its gadget, denoted `Mul`, is the degree-2, arity-2 gadget defined as ~~~ def eval(self, Field, inp): @@ -3220,10 +3212,11 @@ The next instance of Prio3 supports summing of integers in a pre-determined range. Each measurement is an integer in range `[0, 2^bits)`, where `bits` is an associated parameter. -This instance of Prio3 uses XofShake128 ({{xof-shake128}}) as its XOF. Its validity -circuit, denoted `Sum`, uses `Field128` ({{fields}}) as its finite field. The -measurement is encoded as a length-`bits` vector of field elements, where the -`l`th element of the vector represents the `l`th bit of the summand: +This instance of Prio3 uses XofTurboShake128 ({{xof-turboshake128}}) as its +XOF. Its validity circuit, denoted `Sum`, uses `Field128` ({{fields}}) as its +finite field. The measurement is encoded as a length-`bits` vector of field +elements, where the `l`th element of the vector represents the `l`th bit of the +summand: ~~~ def encode(self, measurement): @@ -3283,8 +3276,9 @@ of the measurement is an integer in the range `[0, 2^bits)`. It is RECOMMENDED to set `chunk_length` to an integer near the square root of `length * bits` (see {{parallel-sum-chunk-length}}). -This instance uses XofShake128 ({{xof-shake128}}) as its XOF. Its validity circuit, -denoted `SumVec`, uses `Field128` ({{fields}}) as its finite field. +This instance uses XofTurboShake128 ({{xof-turboshake128}}) as its XOF. Its +validity circuit, denoted `SumVec`, uses `Field128` ({{fields}}) as its finite +field. Measurements are encoded as a vector of field elements with length `length * bits`. The field elements in the encoded vector represent all the bits of the @@ -3417,12 +3411,12 @@ example, the buckets might quantize the real numbers, and each measurement would report the bucket that the corresponding client's real-numbered value falls into. The aggregate result counts the number of measurements in each bucket. -This instance of Prio3 uses XofShake128 ({{xof-shake128}}) as its XOF. Its validity -circuit, denoted `Histogram`, uses `Field128` ({{fields}}) as its finite field. -It has two parameters, `length`, the number of histogram buckets, and -`chunk_length`, which is used by by a circuit optimization described below. It -is RECOMMENDED to set `chunk_length` to an integer near the square root of -`length` (see {{parallel-sum-chunk-length}}). +This instance of Prio3 uses XofTurboShake128 ({{xof-turboshake128}}) as its +XOF. Its validity circuit, denoted `Histogram`, uses `Field128` ({{fields}}) as +its finite field. It has two parameters, `length`, the number of histogram +buckets, and `chunk_length`, which is used by by a circuit optimization +described below. It is RECOMMENDED to set `chunk_length` to an integer near the +square root of `length` (see {{parallel-sum-chunk-length}}). The measurement is encoded as a one-hot vector representing the bucket into which the measurement falls: @@ -4438,8 +4432,8 @@ throws an error. ## Instantiation {#poplar1-inst} By default, Poplar1 is instantiated with IdpfPoplar (`VALUE_LEN == 2`) and -XofShake128 ({{xof-shake128}}). This VDAF is suitable for any positive value of -`BITS`. Test vectors can be found in {{test-vectors}}. +XofTurboShake128 ({{xof-turboshake128}}). This VDAF is suitable for any +positive value of `BITS`. Test vectors can be found in {{test-vectors}}. # Security Considerations {#security} @@ -4591,7 +4585,7 @@ differential privacy. As described in {{xof}}, our constructions rely on eXtendable Output Functions (XOFs). In the security analyses of our protocols, these are -usually modeled as random oracles. XofShake128 is designed to be +usually modeled as random oracles. XofTurboShake128 is designed to be indifferentiable from a random oracle {{MRH04}}, making it a suitable choice for most situations. diff --git a/poc/daf.py b/poc/daf.py index 24047121..1de96325 100644 --- a/poc/daf.py +++ b/poc/daf.py @@ -6,7 +6,7 @@ import field from common import Bool, Unsigned, gen_rand -from xof import XofShake128 +from xof import XofTurboShake128 class Daf: @@ -166,11 +166,11 @@ class TestDaf(Daf): @classmethod def shard(cls, measurement, _nonce, rand): - helper_shares = XofShake128.expand_into_vec(cls.Field, - rand, - b'', - b'', - cls.SHARES-1) + helper_shares = XofTurboShake128.expand_into_vec(cls.Field, + rand, + b'', + b'', + cls.SHARES-1) leader_share = cls.Field(measurement) for helper_share in helper_shares: leader_share -= helper_share diff --git a/poc/draft-irtf-cfrg-kangarootwelve b/poc/draft-irtf-cfrg-kangarootwelve new file mode 160000 index 00000000..11e7bc60 --- /dev/null +++ b/poc/draft-irtf-cfrg-kangarootwelve @@ -0,0 +1 @@ +Subproject commit 11e7bc6052d1ccdc87994cd9905b27535401098b diff --git a/poc/vdaf_poplar1.py b/poc/vdaf_poplar1.py index 16bbd9f5..d8b3535c 100644 --- a/poc/vdaf_poplar1.py +++ b/poc/vdaf_poplar1.py @@ -328,7 +328,7 @@ def with_bits(Poplar1, bits: Unsigned): TheIdpf = idpf_poplar.IdpfPoplar \ .with_value_len(2) \ .with_bits(bits) - TheXof = xof.XofShake128 + TheXof = xof.XofTurboShake128 class Poplar1WithBits(Poplar1): Idpf = TheIdpf @@ -414,28 +414,28 @@ def encode_idpf_field_vec(vec): [2], ) test_vdaf( - Poplar1.with_bits(128), + Poplar1.with_bits(64), ( - 127, - (from_be_bytes(b'0123456789abcdef'),), + 63, + (from_be_bytes(b'01234567'),), ), [ - from_be_bytes(b'0123456789abcdef'), + from_be_bytes(b'01234567'), ], [1], ) test_vdaf( - Poplar1.with_bits(256), + Poplar1.with_bits(64), ( - 63, + 31, ( - from_be_bytes(b'00000000'), - from_be_bytes(b'01234567'), + from_be_bytes(b'0000'), + from_be_bytes(b'0123'), ), ), [ - from_be_bytes(b'0123456789abcdef0123456789abcdef'), - from_be_bytes(b'01234567890000000000000000000000'), + from_be_bytes(b'01234567'), + from_be_bytes(b'01234000'), ], [0, 2], ) diff --git a/poc/vdaf_prio3.py b/poc/vdaf_prio3.py index fd6628cf..cab7bc9e 100644 --- a/poc/vdaf_prio3.py +++ b/poc/vdaf_prio3.py @@ -443,12 +443,12 @@ def test_vec_encode_prep_msg(Prio3, k_joint_rand): class Prio3Count(Prio3): # Generic types required by `Prio3` - Xof = xof.XofShake128 + Xof = xof.XofTurboShake128 Flp = flp_generic.FlpGeneric(flp_generic.Count()) # Associated parameters. ID = 0x00000000 - VERIFY_KEY_SIZE = xof.XofShake128.SEED_SIZE + VERIFY_KEY_SIZE = xof.XofTurboShake128.SEED_SIZE # Operational parameters. test_vec_name = 'Prio3Count' @@ -456,10 +456,10 @@ class Prio3Count(Prio3): class Prio3Sum(Prio3): # Generic types required by `Prio3` - Xof = xof.XofShake128 + Xof = xof.XofTurboShake128 # Associated parameters. - VERIFY_KEY_SIZE = xof.XofShake128.SEED_SIZE + VERIFY_KEY_SIZE = xof.XofTurboShake128.SEED_SIZE ID = 0x00000001 # Operational parameters. @@ -474,10 +474,10 @@ class Prio3SumWithBits(Prio3Sum): class Prio3SumVec(Prio3): # Generic types required by `Prio3` - Xof = xof.XofShake128 + Xof = xof.XofTurboShake128 # Associated parameters. - VERIFY_KEY_SIZE = xof.XofShake128.SEED_SIZE + VERIFY_KEY_SIZE = xof.XofTurboShake128.SEED_SIZE ID = 0x00000002 # Operational parameters. @@ -495,10 +495,10 @@ class Prio3SumVecWithParams(Prio3SumVec): class Prio3Histogram(Prio3): # Generic types required by `Prio3` - Xof = xof.XofShake128 + Xof = xof.XofTurboShake128 # Associated parameters. - VERIFY_KEY_SIZE = xof.XofShake128.SEED_SIZE + VERIFY_KEY_SIZE = xof.XofTurboShake128.SEED_SIZE ID = 0x00000003 # Operational parameters. @@ -552,10 +552,10 @@ class Prio3SumVecWithMultiproofAndParams(cls): class Prio3MultiHotHistogram(Prio3): # Generic types required by `Prio3` - Xof = xof.XofShake128 + Xof = xof.XofTurboShake128 # Associated parameters. - VERIFY_KEY_SIZE = xof.XofShake128.SEED_SIZE + VERIFY_KEY_SIZE = xof.XofTurboShake128.SEED_SIZE # Private codepoint just for testing. ID = 0xFFFFFFFF @@ -584,11 +584,11 @@ class TestPrio3Average(Prio3): class's decode() method. """ - Xof = xof.XofShake128 + Xof = xof.XofTurboShake128 # NOTE 0xFFFFFFFF is reserved for testing. If we decide to standardize this # Prio3 variant, then we'll need to pick a real codepoint for it. ID = 0xFFFFFFFF - VERIFY_KEY_SIZE = xof.XofShake128.SEED_SIZE + VERIFY_KEY_SIZE = xof.XofTurboShake128.SEED_SIZE @classmethod def with_bits(cls, bits: Unsigned): @@ -650,7 +650,7 @@ def test_prio3sumvec_with_multiproof(): num_shares = 2 # Must be in range `[2, 256)` cls = Prio3 \ - .with_xof(xof.XofShake128) \ + .with_xof(xof.XofTurboShake128) \ .with_flp(flp.FlpTestField128()) \ .with_shares(num_shares) cls.ID = 0xFFFFFFFF @@ -659,7 +659,7 @@ def test_prio3sumvec_with_multiproof(): # If JOINT_RAND_LEN == 0, then Fiat-Shamir isn't needed and we can skip # generating the joint randomness. cls = Prio3 \ - .with_xof(xof.XofShake128) \ + .with_xof(xof.XofTurboShake128) \ .with_flp(flp.FlpTestField128.with_joint_rand_len(0)) \ .with_shares(num_shares) cls.ID = 0xFFFFFFFF diff --git a/poc/xof.py b/poc/xof.py index 0adb4d15..e1ae5ee4 100644 --- a/poc/xof.py +++ b/poc/xof.py @@ -2,13 +2,20 @@ from __future__ import annotations +import sys +sys.path.append('draft-irtf-cfrg-kangarootwelve/py') + from Cryptodome.Cipher import AES -from Cryptodome.Hash import SHAKE128 +from TurboSHAKE import TurboSHAKE128 from common import (TEST_VECTOR, VERSION, Bytes, Unsigned, concat, format_dst, from_le_bytes, gen_rand, next_power_of_2, print_wrapped_line, to_le_bytes, xor) +# Maximum XOF output length that will be requested by any test in this package. +# Each time `XofTurboShake128` is constructed we call `TurboSHAKE128()` once +# and fill a buffer with the output stream. +MAX_XOF_OUT_STREAM_BYTES = 2000 class Xof: """The base class for XOFs.""" @@ -61,27 +68,28 @@ def expand_into_vec(Xof, return xof.next_vec(Field, length) -class XofShake128(Xof): +class XofTurboShake128(Xof): """XOF based on SHA-3 (SHAKE128).""" # Associated parameters SEED_SIZE = 16 # Operational parameters. - test_vec_name = 'XofShake128' + test_vec_name = 'XofTurboShake128' def __init__(self, seed, dst, binder): - # The input is composed of `dst`, the domain separation tag, the - # `seed`, and the `binder` string. - self.shake = SHAKE128.new() - dst_length = to_le_bytes(len(dst), 1) - self.shake.update(dst_length) - self.shake.update(dst) - self.shake.update(seed) - self.shake.update(binder) + self.length_consumed = 0 + self.stream = TurboSHAKE128( + to_le_bytes(len(dst), 1) + dst + seed + binder, + 1, + MAX_XOF_OUT_STREAM_BYTES, + ) - def next(self, length: Unsigned) -> Bytes: - return self.shake.read(length) + def next(self, length): + assert self.length_consumed + length < MAX_XOF_OUT_STREAM_BYTES + out = self.stream[self.length_consumed:self.length_consumed+length] + self.length_consumed += length + return out class XofFixedKeyAes128(Xof): @@ -106,12 +114,8 @@ def __init__(self, seed, dst, binder): # # Implementation note: This step can be cached across XOF # evaluations with many different seeds. - shake = SHAKE128.new() - dst_length = to_le_bytes(len(dst), 1) - shake.update(dst_length) - shake.update(dst) - shake.update(binder) - fixed_key = shake.read(16) + fixed_key = TurboSHAKE128( + to_le_bytes(len(dst), 1) + dst + binder, 2, 16) self.cipher = AES.new(fixed_key, AES.MODE_ECB) # Save seed to be used in `next`. self.seed = seed @@ -182,17 +186,17 @@ def test_xof(Xof, F, expanded_len): # This test case was found through brute-force search using this tool: # https://github.com/divergentdave/vdaf-rejection-sampling-search - expanded_vec = XofShake128.expand_into_vec( + expanded_vec = XofTurboShake128.expand_into_vec( Field64, - bytes([0x29, 0xb2, 0x98, 0x64, 0xb4, 0xaa, 0x4e, 0x07, 0x2a, 0x44, - 0x49, 0x24, 0xf6, 0x74, 0x0a, 0x3d]), + bytes([0xd1, 0x95, 0xec, 0x90, 0xc1, 0xbc, 0xf1, 0xf2, 0xcb, 0x2c, + 0x7e, 0x74, 0xc5, 0xc5, 0xf6, 0xda]), b'', # domain separation tag b'', # binder - 33237, + 140, ) - assert expanded_vec[-1] == Field64(2035552711764301796) + assert expanded_vec[-1] == Field64(9734340616212735019) - for cls in (XofShake128, XofFixedKeyAes128): + for cls in (XofTurboShake128, XofFixedKeyAes128): test_xof(cls, Field128, 23) if TEST_VECTOR: