From acfa611a3e9e0c449b741ddb7dfa5e95891edab6 Mon Sep 17 00:00:00 2001 From: Alfred Klomp Date: Tue, 9 Jan 2024 21:57:38 +0100 Subject: [PATCH] bin/base64: decode: ignore newlines By popular demand, ignore newlines in the encoded input. This achieves bug compatibility with GNU base64. The algorithm is quite naive and slow because it checks each byte independently. There is definitely room for improvement. --- bin/base64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/bin/base64.c b/bin/base64.c index 6a94074..09e7c54 100644 --- a/bin/base64.c +++ b/bin/base64.c @@ -340,7 +340,21 @@ encode (const struct config *config, struct buffer *buf) return true; } -static int +static inline const char * +find_newline (const char *p, size_t len) +{ + // This is very naive and can definitely be improved. + while (len-- > 0) { + if (*p == '\n') { + return p; + } + p++; + } + + return NULL; +} + +static bool decode (const struct config *config, struct buffer *buf) { size_t nread, nout; @@ -352,18 +366,54 @@ decode (const struct config *config, struct buffer *buf) // Read encoded data into the buffer. Use the smallest buffer size to // be on the safe side: the decoded output will fit the raw buffer. while ((nread = fread(buf->enc, 1, BUFFER_RAW_SIZE, config->fp)) > 0) { + char *s = buf->enc; + size_t avail = nread; + + // By popular demand, this utility tries to be bug-compatible + // with GNU `base64'. That includes silently ignoring newlines + // in the input. Tokenize the input on newline characters. + // This is quite slow, and at some point we will want to + // vectorize this. + while (avail > 0) { + + // Find the next newline character. + const char *p = find_newline(s, avail); + + // Find the length of the next chunk. If no newline is + // found, use the rest of the buffer. + size_t len = p == NULL ? avail : (size_t) (p - s); + + // Ignore empty chunks. + if (len == 0) { + s++; + avail--; + continue; + } - // Decode the input into the raw buffer. - if (base64_stream_decode(&state, buf->enc, nread, - buf->raw, &nout) == 0) { - fprintf(stderr, "%s: %s: decoding error\n", - config->name, config->file); - return false; - } + // Decode the input into the raw buffer. + if (base64_stream_decode(&state, s, len, + buf->raw, &nout) == 0) { + fprintf(stderr, "%s: %s: decoding error\n", + config->name, config->file); + return false; + } - // Append the raw data to the output stream. - if (write_stdout(config, buf->raw, nout) == false) { - return false; + // Append the raw data to the output stream. + if (write_stdout(config, buf->raw, nout) == false) { + return false; + } + + // Bail out if the whole string has been consumed. + if (p == NULL) { + break; + } + + // Add the newline to the chunk length. + len++; + + // Move the start pointer and length past the chunk. + s += len; + avail -= len; } }