Skip to content

Commit

Permalink
ec2: Support double encoded userdata (#4276)
Browse files Browse the repository at this point in the history
ec2: Support double encoded userdata

The Amazon APIs expect userdata to be base64 encoded when passed as
input to e.g. RunInstances.  A number of tools, including the AWS CLI,
perform this base64 encoding implicitly, but it's common for users to
base64 encode the data prior to passing it to them. This results in two
layers of base64 encoding and effectively results in a failed EC2
launch.  This change adds the ability to decode the redundant layer of 
encoding.

Fixes amazonlinux/amazon-linux-2023#401

Signed-off-by: Noah Meyerhans <[email protected]>
  • Loading branch information
nmeyerhans authored Aug 14, 2023
1 parent 9cc3b8f commit 6588373
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 48 deletions.
3 changes: 2 additions & 1 deletion cloudinit/sources/DataSourceEc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,13 +567,14 @@ def crawl_metadata(self):
else:
exc_cb = exc_cb_ud = skip_cb = None
try:
crawled_metadata["user-data"] = ec2.get_instance_userdata(
raw_userdata = ec2.get_instance_userdata(
api_version,
self.metadata_address,
headers_cb=self._get_headers,
headers_redact=redact,
exception_cb=exc_cb_ud,
)
crawled_metadata["user-data"] = util.maybe_b64decode(raw_userdata)
crawled_metadata["meta-data"] = ec2.get_instance_metadata(
api_version,
self.metadata_address,
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/sources/DataSourceHetzner.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _get_data(self):
# The fallout is that in the event of b64 encoded user-data,
# /var/lib/cloud-init/cloud-config.txt will not be identical to the
# user-data provided. It will be decoded.
self.userdata_raw = hc_helper.maybe_b64decode(ud)
self.userdata_raw = util.maybe_b64decode(ud)
self.metadata_full = md

# hostname is name provided by user at launch. The API enforces it is
Expand Down
19 changes: 0 additions & 19 deletions cloudinit/sources/helpers/hetzner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
#
# This file is part of cloud-init. See LICENSE file for license information.

import base64
import binascii

from cloudinit import url_helper, util


Expand All @@ -25,19 +22,3 @@ def read_userdata(url, timeout=2, sec_between=2, retries=30):
if not response.ok():
raise RuntimeError("unable to read userdata at %s" % url)
return response.contents


def maybe_b64decode(data: bytes) -> bytes:
"""base64 decode data
If data is base64 encoded bytes, return b64decode(data).
If not, return data unmodified.
@param data: data as bytes. TypeError is raised if not bytes.
"""
if not isinstance(data, bytes):
raise TypeError("data is '%s', expected bytes" % type(data))
try:
return base64.b64decode(data, validate=True)
except binascii.Error:
return data
18 changes: 18 additions & 0 deletions cloudinit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#
# This file is part of cloud-init. See LICENSE file for license information.

import binascii
import contextlib
import copy as obj_copy
import email
Expand All @@ -32,6 +33,7 @@
import subprocess
import sys
import time
from base64 import b64decode
from collections import deque, namedtuple
from contextlib import suppress
from errno import EACCES, ENOENT
Expand Down Expand Up @@ -137,6 +139,22 @@ def encode_text(text, encoding="utf-8"):
return text.encode(encoding)


def maybe_b64decode(data: bytes) -> bytes:
"""base64 decode data
If data is base64 encoded bytes, return b64decode(data).
If not, return data unmodified.
@param data: data as bytes. TypeError is raised if not bytes.
"""
if not isinstance(data, bytes):
raise TypeError("data is '%s', expected bytes" % type(data))
try:
return b64decode(data, validate=True)
except binascii.Error:
return data


def fully_decoded_payload(part):
# In Python 3, decoding the payload will ironically hand us a bytes object.
# 'decode' means to decode according to Content-Transfer-Encoding, not
Expand Down
27 changes: 0 additions & 27 deletions tests/unittests/sources/test_hetzner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@
#
# This file is part of cloud-init. See LICENSE file for license information.

import base64

import pytest

import cloudinit.sources.helpers.hetzner as hc_helper
from cloudinit import helpers, settings, util
from cloudinit.sources import DataSourceHetzner
from tests.unittests.helpers import CiTestCase, mock
Expand Down Expand Up @@ -143,25 +138,3 @@ def test_not_on_hetzner_returns_false(
# These are a white box attempt to ensure it did not search.
m_find_fallback.assert_not_called()
m_read_md.assert_not_called()


class TestMaybeB64Decode:
"""Test the maybe_b64decode helper function."""

@pytest.mark.parametrize("invalid_input", (str("not bytes"), int(4)))
def test_raises_error_on_non_bytes(self, invalid_input):
"""maybe_b64decode should raise error if data is not bytes."""
with pytest.raises(TypeError):
hc_helper.maybe_b64decode(invalid_input)

@pytest.mark.parametrize(
"in_data,expected",
[
# If data is not b64 encoded, then return value should be the same.
(b"this is my data", b"this is my data"),
# If data is b64 encoded, then return value should be decoded.
(base64.b64encode(b"data"), b"data"),
],
)
def test_happy_path(self, in_data, expected):
assert expected == hc_helper.maybe_b64decode(in_data)
22 changes: 22 additions & 0 deletions tests/unittests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3137,3 +3137,25 @@ class TestComparePermissions:
)
def test_compare_permissions(self, perm1, perm2, expected):
assert util.compare_permission(perm1, perm2) == expected


class TestMaybeB64Decode:
"""Test the maybe_b64decode helper function."""

@pytest.mark.parametrize("invalid_input", (str("not bytes"), int(4)))
def test_raises_error_on_non_bytes(self, invalid_input):
"""maybe_b64decode should raise error if data is not bytes."""
with pytest.raises(TypeError):
util.maybe_b64decode(invalid_input)

@pytest.mark.parametrize(
"in_data,expected",
[
# If data is not b64 encoded, then return value should be the same.
(b"this is my data", b"this is my data"),
# If data is b64 encoded, then return value should be decoded.
(base64.b64encode(b"data"), b"data"),
],
)
def test_happy_path(self, in_data, expected):
assert expected == util.maybe_b64decode(in_data)
1 change: 1 addition & 0 deletions tools/.github-cla-signers
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ netcho
nicolasbock
nishigori
nkukard
nmeyerhans
olivierlemasle
omBratteng
onitake
Expand Down

0 comments on commit 6588373

Please sign in to comment.