From 74bb51c642a2668effbfaeb8b0256dc7ac02ccde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=98yvind=20R=C3=B8nningstad?=
 <oyvind.ronningstad@nordicsemi.no>
Date: Thu, 25 Apr 2024 11:58:22 +0000
Subject: [PATCH] zcbor_encode: Add new fragmented string encoding API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

to match the redesigned decoding API.

Signed-off-by: Øyvind Rønningstad <oyvind.ronningstad@nordicsemi.no>
---
 README.md                              |  25 ++++
 include/zcbor_encode.h                 |  40 ++++++
 src/zcbor_encode.c                     | 117 ++++++++++++++++-
 tests/unit/test1_unit_tests/src/main.c | 171 +++++++++++++++++++++++++
 4 files changed, 351 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 212c2d19..d4e4c8f6 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,31 @@ ZCBOR_STATE_D(decode_state, n, payload, payload_len, elem_count, n_flags);
 ZCBOR_STATE_E(encode_state, n, payload, payload_len, 0);
 ```
 
+Fragmented payloads
+-------------------
+
+zcbor can encode and decode payloads in sections.
+This can be useful e.g. if you send or receive your payload in multiple packets.
+When the current payload section is done, call `zcbor_update_state()` to introduce the next section.
+Note that zcbor does not allow section boundaries to fall inside a zcbor header/value pair.
+This means that the following elements cannot be split between sections:
+
+- Numbers and simple values (integers, floats, bools, undefined, nil)
+- Tags
+- Headers of lists, maps, tstrs, and bstrs
+
+If your payload is split in an unsupported way, you can get around it by making a small section out of the remaining bytes of one section spliced with the start of the next.
+Another option is to leave a little room at the start of each section buffer, and copy the remaining end of one section into the start of the next buffer.
+8 bytes should be enough for this.
+
+Lists and maps can span multiple sections, as long as the individual elements are not split as to break the above rule.
+
+String payloads can be split across multiple payload sections, if `ZCBOR_FRAGMENTS` is enabled, and the `*str_fragments_*()` APIs are used. Note that in the zcbor docs, the term "string fragment" is used for fragmented strings, while the term "payload section" is used for fragmented CBOR payloads, as passed to `zcbor_update_state()`. These do not always line up perfectly, particularly at the start and end of fragmented strings.
+
+CBOR-encoded bstrs can be nested, and there can also be a non-CBOR-encoded innermost string.
+The current innermost string is called the "current string".
+`zcbor_update_state()` modifies all backups so that outer nested strings have updated information about the new section.
+
 Configuration
 -------------
 
diff --git a/include/zcbor_encode.h b/include/zcbor_encode.h
index 9bd9383c..326c47ba 100644
--- a/include/zcbor_encode.h
+++ b/include/zcbor_encode.h
@@ -231,6 +231,46 @@ bool zcbor_bstr_start_encode(zcbor_state_t *state);
  */
 bool zcbor_bstr_end_encode(zcbor_state_t *state, struct zcbor_string *result);
 
+
+#ifdef ZCBOR_FRAGMENTS
+
+/** Start encoding a fragmented string. I.e. a string spread over non-consecutive payload sections.
+ *
+ * After calling this, you can write a fragment with @ref zcbor_str_fragment_encode,
+ * then update the payload with @ref zcbor_update_state.
+ * Repeat until the string is fully decoded, then call @ref zcbor_bstr_fragments_end_encode.
+ */
+bool zcbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t total_len);
+bool zcbor_tstr_fragments_start_encode(zcbor_state_t *state, size_t total_len);
+
+/** Start encoding a fragmented CBOR-encoded bytestring.
+ *
+ * I.e. a string spread over non-consecutive payload sections.
+ *
+ * This is an alternative to zcbor_*str_fragments_start_encode() to be used if the payload
+ * contains CBOR data that will be encoded directly with other zcbor_*() functions.
+ *
+ * A state backup is created to keep track of the element count and original payload_end.
+ * After calling this, you can encode elements using other zcbor functions,
+ * then update the payload with @ref zcbor_update_state.
+ * Repeat until the string is fully decoded, then call @ref zcbor_bstr_fragments_end_encode.
+ * When the current payload section contains the end of the string,
+ * payload_end is set to the end of the string, so there is no risk of encoding past the end.
+ */
+bool zcbor_cbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t total_len);
+
+/** Retrieve a string fragment.
+ *
+ * Consume bytes from the payload until either the end of the payload or the end of the string.
+ * Do not use this function with @ref zcbor_cbor_bstr_fragments_start_encode.
+ */
+bool zcbor_str_fragment_encode(zcbor_state_t *state, struct zcbor_string *fragment, size_t *enc_len);
+
+/** Finish encoding a fragmented string. */
+bool zcbor_str_fragments_end_encode(zcbor_state_t *state);
+
+#endif /* ZCBOR_FRAGMENTS */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/zcbor_encode.c b/src/zcbor_encode.c
index 584dad97..7ec8de5d 100644
--- a/src/zcbor_encode.c
+++ b/src/zcbor_encode.c
@@ -48,6 +48,10 @@ static bool encode_header_byte(zcbor_state_t *state,
 
 	zcbor_assert_state(additional < 32, "Unsupported additional value: %d\r\n", additional);
 
+#ifdef ZCBOR_FRAGMENTS
+	ZCBOR_ERR_IF(state->inside_frag_str, ZCBOR_ERR_INSIDE_STRING);
+#endif
+
 	*(state->payload_mut) = (uint8_t)((major_type << 5) | (additional & 0x1F));
 	zcbor_trace(state, "value_encode");
 	state->payload_mut++;
@@ -243,6 +247,7 @@ bool zcbor_bstr_start_encode(zcbor_state_t *state)
 
 	/* Encode a dummy header */
 	if (!value_encode(state, ZCBOR_MAJOR_TYPE_BSTR, &max_len, sizeof(max_len))) {
+		zcbor_process_backup(state, ZCBOR_FLAG_CONSUME, 0xFFFFFFFF);
 		ZCBOR_FAIL();
 	}
 	return true;
@@ -268,7 +273,7 @@ bool zcbor_bstr_end_encode(zcbor_state_t *state, struct zcbor_string *result)
 	result->value = state->payload_end - remaining_str_len(state);
 	result->len = (size_t)payload - (size_t)result->value;
 
-	/* Reencode header of list now that we know the number of elements. */
+	/* Reencode header of list now that we know the length. */
 	if (!zcbor_bstr_encode(state, result)) {
 		ZCBOR_FAIL();
 	}
@@ -288,7 +293,7 @@ static bool str_encode(zcbor_state_t *state,
 	}
 	if (state->payload_mut != input->value) {
 		/* Use memmove since string might be encoded into the same space
-		 * because of bstrx_cbor_start_encode/bstrx_cbor_end_encode. */
+		 * because of zcbor_bstr_start_encode/zcbor_bstr_end_encode. */
 		memmove(state->payload_mut, input->value, input->len);
 	}
 	state->payload += input->len;
@@ -336,6 +341,106 @@ bool zcbor_tstr_put_term(zcbor_state_t *state, char const *str, size_t maxlen)
 }
 
 
+#ifdef ZCBOR_FRAGMENTS
+
+static bool start_encode_fragments(zcbor_state_t *state,
+	zcbor_major_type_t major_type, size_t len, bool cbor_bstr)
+{
+	ZCBOR_CHECK_PAYLOAD();
+
+	if (state->inside_cbor_bstr) {
+		if ((state->str_total_len_cbor - zcbor_current_string_offset(state) - zcbor_header_len(len)) < len) {
+			ZCBOR_ERR(ZCBOR_ERR_INNER_STRING_TOO_LARGE);
+		}
+	}
+
+	if (cbor_bstr) {
+		if (!zcbor_new_backup(state, 0)) {
+			ZCBOR_FAIL();
+		}
+	}
+
+	if (!value_encode(state, major_type, &len, sizeof(len))) {
+		if (cbor_bstr) {
+			zcbor_process_backup(state, ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_RESTORE, 0xFFFFFFFF);
+		}
+		ZCBOR_FAIL();
+	}
+
+	ptrdiff_t new_offset = state->constant_state->curr_payload_section - state->payload;
+
+	if (cbor_bstr) {
+		state->frag_offset_cbor = new_offset;
+		state->str_total_len_cbor = len;
+		state->inside_cbor_bstr = true;
+	} else {
+		state->frag_offset = new_offset;
+		state->str_total_len = len;
+		state->inside_frag_str = true;
+	}
+
+	return true;
+}
+
+
+bool zcbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t len)
+{
+	return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_BSTR, len, false);
+}
+
+
+bool zcbor_tstr_fragments_start_encode(zcbor_state_t *state, size_t len)
+{
+	return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_TSTR, len, false);
+}
+
+
+bool zcbor_cbor_bstr_fragments_start_encode(zcbor_state_t *state, size_t len)
+{
+	return start_encode_fragments(state, ZCBOR_MAJOR_TYPE_BSTR, len, true);
+}
+
+
+bool zcbor_str_fragment_encode(zcbor_state_t *state, struct zcbor_string *fragment, size_t *enc_len)
+{
+	ZCBOR_CHECK_PAYLOAD();
+
+	ZCBOR_ERR_IF(!state->inside_frag_str, ZCBOR_ERR_NOT_IN_FRAGMENT);
+
+	size_t len  = MIN(MIN((size_t)state->payload_end - (size_t)state->payload, fragment->len),
+				state->str_total_len - zcbor_current_string_offset(state));
+
+	memcpy(state->payload_mut, fragment->value, len);
+	state->payload += len;
+
+	if (enc_len != NULL) {
+		*enc_len = len;
+	}
+
+	return true;
+}
+
+
+bool zcbor_str_fragments_end_encode(zcbor_state_t *state)
+{
+	ZCBOR_ERR_IF(!state->inside_frag_str && !state->inside_cbor_bstr, ZCBOR_ERR_NOT_IN_FRAGMENT);
+	ZCBOR_ERR_IF(zcbor_current_string_remainder(state) != 0, ZCBOR_ERR_NOT_AT_END);
+
+	if (state->inside_frag_str) {
+		state->inside_frag_str = false;
+	} else {
+		if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_KEEP_PAYLOAD, 0xFFFFFFFF)) {
+			ZCBOR_FAIL();
+		}
+		state->elem_count++;
+	}
+
+	return true;
+}
+
+#endif /* ZCBOR_FRAGMENTS */
+
+
 static bool list_map_start_encode(zcbor_state_t *state, size_t max_num,
 		zcbor_major_type_t major_type)
 {
@@ -385,6 +490,14 @@ static bool list_map_end_encode(zcbor_state_t *state, size_t max_num,
 	size_t max_header_len = zcbor_header_len_ptr(&max_num, 4) - 1;
 	size_t header_len = zcbor_header_len_ptr(&list_count, 4) - 1;
 
+	if (max_num == list_count) {
+		if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME | ZCBOR_FLAG_KEEP_PAYLOAD, 0xFFFFFFFF)) {
+			ZCBOR_FAIL();
+		}
+		state->elem_count++;
+		return true;
+	}
+
 	if (!zcbor_process_backup(state, ZCBOR_FLAG_RESTORE | ZCBOR_FLAG_CONSUME, 0xFFFFFFFF)) {
 		ZCBOR_FAIL();
 	}
diff --git a/tests/unit/test1_unit_tests/src/main.c b/tests/unit/test1_unit_tests/src/main.c
index d94f4637..e69ab160 100644
--- a/tests/unit/test1_unit_tests/src/main.c
+++ b/tests/unit/test1_unit_tests/src/main.c
@@ -641,6 +641,177 @@ ZTEST(zcbor_unit_tests, test_bstr_cbor_fragments)
 	zassert_mem_equal(output.value, &payload[4], 11, NULL);
 }
 
+#define zassert_error(err, state) zassert_equal(err, zcbor_peek_error(state), #err " != %s\n", zcbor_error_str(zcbor_peek_error(state)))
+
+
+ZTEST(zcbor_unit_tests, test_nested_fragments)
+{
+	uint8_t lorem[] = "Lorem ipsum dolor sit amet";
+	struct zcbor_string lorem_str = {.value = lorem, .len = sizeof(lorem) - 1};
+	struct zcbor_string lorem_str_exp = {.value = lorem, .len = sizeof(lorem) - 1};
+	struct zcbor_string_fragment output_frags[3];
+	uint8_t output_string[30];
+	size_t output_str_len = sizeof(output_string);
+	struct zcbor_string res_str;
+	size_t enc_len;
+	uint8_t payload_frag1[4];
+	int dummy_sep1; // To separate payload fragments
+	uint8_t payload_frag2[18];
+	int dummy_sep2; // To separate payload fragments
+	uint8_t payload_frag3[10];
+	int dummy_sep3; // To separate payload fragments
+	uint8_t payload_frag4[25];
+
+	uint8_t payload1[100];
+
+	(void)dummy_sep1;
+	(void)dummy_sep2;
+	(void)dummy_sep3;
+
+	ZCBOR_STATE_E(state_e, 4, payload_frag1, sizeof(payload_frag1), 0);
+
+	ZCBOR_STATE_D(state_d, 4, payload_frag1, sizeof(payload_frag1) - 1, 1, 0);
+	ZCBOR_STATE_D(state_d2, 4, payload1, sizeof(payload1), 1, 0);
+
+	/* Start encode tests, negative tests are indented. */
+
+	/* payload_frag1 */
+	zassert_true(zcbor_list_start_encode(state_e, 2));
+		zassert_false(zcbor_str_fragments_end_encode(state_e));
+		zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_e);
+	zassert_true(zcbor_uint32_put(state_e, 42));
+		zassert_false(zcbor_cbor_bstr_fragments_start_encode(state_e, 38));
+		zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_e);
+	zcbor_update_state(state_e, payload_frag2, sizeof(payload_frag2)); /* Abandon 1 byte of the fragment. */
+
+#ifdef ZCBOR_CANONICAL
+	#define LEN_OFFS 0
+#else
+	#define LEN_OFFS 1
+#endif
+
+	/* payload_frag2 */
+	zassert_true(zcbor_cbor_bstr_fragments_start_encode(state_e, 37 + LEN_OFFS));
+	zassert_true(zcbor_uint32_put(state_e, 43));
+	zassert_true(zcbor_list_start_encode(state_e, 2));
+	zassert_true(zcbor_uint32_put(state_e, 44));
+		zassert_false(zcbor_cbor_bstr_fragments_start_encode(state_e, lorem_str.len + 5 + LEN_OFFS));
+		zassert_error(ZCBOR_ERR_INNER_STRING_TOO_LARGE, state_e);
+	zassert_true(zcbor_cbor_bstr_fragments_start_encode(state_e, lorem_str.len + 4));
+		zassert_false(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len));
+		zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_e);
+	zassert_true(zcbor_uint32_put(state_e, 45));
+		zassert_false(zcbor_tstr_fragments_start_encode(state_e, lorem_str.len + 1));
+		zassert_error(ZCBOR_ERR_INNER_STRING_TOO_LARGE, state_e);
+	bool ret = zcbor_tstr_fragments_start_encode(state_e, lorem_str.len);
+	zassert_true(ret, "err %s\n", zcbor_error_str(zcbor_peek_error(state_e)));
+		zassert_false(zcbor_uint32_put(state_e, 46));
+		zassert_error(ZCBOR_ERR_INSIDE_STRING, state_e);
+		zassert_false(zcbor_tstr_fragments_start_encode(state_e, 1));
+		zassert_error(ZCBOR_ERR_INSIDE_STRING, state_e);
+	zassert_true(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len));
+	zassert_equal(sizeof(payload_frag2) - 13, enc_len);
+		zassert_false(zcbor_str_fragment_encode(state_e, &lorem_str, NULL));
+		zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_e);
+	zcbor_update_state(state_e, payload_frag3, sizeof(payload_frag3));
+
+	/* payload_frag3 */
+	lorem_str.value += enc_len;
+	lorem_str.len -= enc_len;
+	zassert_true(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len));
+	zassert_equal(sizeof(payload_frag3), enc_len);
+	zcbor_update_state(state_e, payload_frag4, sizeof(payload_frag4));
+
+	/* payload_frag4 */
+	lorem_str.value += enc_len;
+	lorem_str.len -= enc_len;
+	zassert_true(zcbor_str_fragment_encode(state_e, &lorem_str, &enc_len));
+	zassert_equal(lorem_str.len, enc_len, "%d != %d\n", lorem_str.len, enc_len);
+	zassert_true(zcbor_str_fragments_end_encode(state_e));
+	zassert_true(zcbor_str_fragments_end_encode(state_e));
+	ret = zcbor_list_end_encode(state_e, 2);
+	zassert_true(ret, "err %s\n", zcbor_error_str(zcbor_peek_error(state_e)));
+
+	ret = zcbor_str_fragments_end_encode(state_e);
+	zassert_true(ret, "err %s\n", zcbor_error_str(zcbor_peek_error(state_e)));
+
+		zassert_false(zcbor_str_fragments_end_encode(state_e));
+		zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_e);
+	zassert_true(zcbor_list_end_encode(state_e, 2));
+	size_t offs = 0;
+	memcpy(payload1, payload_frag1, sizeof(payload_frag1) - 1);
+	offs += sizeof(payload_frag1) - 1; /* 1 abandoned byte */
+	memcpy(&payload1[offs], payload_frag2, sizeof(payload_frag2));
+	offs += sizeof(payload_frag2);
+	memcpy(&payload1[offs], payload_frag3, sizeof(payload_frag3));
+	offs += sizeof(payload_frag3);
+	memcpy(&payload1[offs], payload_frag4, sizeof(payload_frag4));
+
+	/* Check */
+	zassert_true(zcbor_list_start_decode(state_d2));
+	zassert_true(zcbor_uint32_expect(state_d2, 42));
+	zassert_true(zcbor_bstr_start_decode(state_d2, &res_str));
+	zassert_true(zcbor_uint32_expect(state_d2, 43));
+	zassert_true(zcbor_list_start_decode(state_d2));
+	zassert_true(zcbor_uint32_expect(state_d2, 44));
+	zassert_true(zcbor_bstr_start_decode(state_d2, &res_str));
+	zassert_true(zcbor_uint32_expect(state_d2, 45));
+	zassert_true(zcbor_tstr_expect(state_d2, &lorem_str_exp));
+	zassert_true(zcbor_bstr_end_decode(state_d2));
+	zassert_true(zcbor_list_end_decode(state_d2));
+	zassert_true(zcbor_bstr_end_decode(state_d2));
+	zassert_true(zcbor_list_end_decode(state_d2));
+
+	/* Start decode tests, negative tests are indented. */
+
+	/* payload_frag1 */
+	zassert_true(zcbor_list_start_decode(state_d));
+		zassert_false(zcbor_str_fragments_end_decode(state_d));
+		zassert_error(ZCBOR_ERR_NOT_IN_FRAGMENT, state_d);
+	zassert_true(zcbor_uint32_expect(state_d, 42));
+		zassert_false(zcbor_cbor_bstr_fragments_start_decode(state_d));
+		zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_d);
+	zcbor_update_state(state_d, payload_frag2, sizeof(payload_frag2));
+
+	/* payload_frag2 */
+	zassert_true(zcbor_cbor_bstr_fragments_start_decode(state_d));
+	zassert_true(zcbor_uint32_expect(state_d, 43));
+	zassert_true(zcbor_list_start_decode(state_d));
+		zassert_false(zcbor_cbor_bstr_fragments_start_decode(state_d));
+		zassert_error(ZCBOR_ERR_WRONG_TYPE, state_d);
+	zassert_true(zcbor_uint32_expect(state_d, 44));
+		state_d->payload_mut[1] += 2; /* induce an error */
+		zassert_false(zcbor_cbor_bstr_fragments_start_decode(state_d));
+		zassert_error(ZCBOR_ERR_INNER_STRING_TOO_LARGE, state_d);
+		state_d->payload_mut[1] -= 2;
+	zassert_true(zcbor_cbor_bstr_fragments_start_decode(state_d));
+	zassert_true(zcbor_uint32_expect(state_d, 45));
+	zassert_true(zcbor_tstr_fragments_start_decode(state_d));
+		zassert_false(zcbor_uint32_expect(state_d, 46));
+		zassert_error(ZCBOR_ERR_INSIDE_STRING, state_d);
+		zassert_false(zcbor_tstr_fragments_start_decode(state_d));
+		zassert_error(ZCBOR_ERR_INSIDE_STRING, state_d);
+	zassert_true(zcbor_str_fragment_decode(state_d, &output_frags[0]));
+		zassert_false(zcbor_str_fragment_decode(state_d, &output_frags[1]));
+		zassert_error(ZCBOR_ERR_NO_PAYLOAD, state_d);
+	zcbor_update_state(state_d, payload_frag3, sizeof(payload_frag3));
+
+	/* payload_frag3 */
+	zassert_true(zcbor_str_fragment_decode(state_d, &output_frags[1]));
+	zcbor_update_state(state_d, payload_frag4, sizeof(payload_frag4));
+
+	/* payload_frag4 */
+	zassert_true(zcbor_str_fragment_decode(state_d, &output_frags[2]));
+	zassert_true(zcbor_validate_string_fragments(output_frags, 3));
+	zassert_true(zcbor_splice_string_fragments(output_frags, 3, output_string, &output_str_len));
+	zassert_mem_equal(output_string, lorem, sizeof(lorem) - 1);
+	zassert_true(zcbor_str_fragments_end_decode(state_d));
+	zassert_true(zcbor_str_fragments_end_decode(state_d));
+	zassert_true(zcbor_list_end_decode(state_d), NULL);
+	zassert_true(zcbor_str_fragments_end_decode(state_d));
+}
+
+
 ZTEST(zcbor_unit_tests, test_canonical_list)
 {
 #ifndef ZCBOR_CANONICAL