Skip to content

Commit

Permalink
Fixed writing of cached token as str and removed in token_util.py ext…
Browse files Browse the repository at this point in the history
…ra decoding that was corrupting idtoken

jwt.encode can accept a string as key, but will encode it in utf-8, not latin-1, causing a different value
this caused the subtle error where the IDTOKENs verification was failing.

I removed the encoding, fixed type notation, and
I added also a note in a comment to avoid future changes to repeat the error:
    master_key should be `bytes`. `str` could cause value changes if was decoded not using utf-8.
    The manual (https://pyjwt.readthedocs.io/en/stable/api.html) is incorrect to list `str` only.
    The source code (https://github.com/jpadilla/pyjwt/blob/72ad55f6d7041ae698dc0790a690804118be50fc/jwt/api_jws.py)
    shows `AllowedPrivateKeys | str | bytes` and if it is str, then it is encoded w/ utf-8:  value.encode("utf-8")
  • Loading branch information
mambelli committed Jul 24, 2023
1 parent 308816d commit 69eea68
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 36 deletions.
2 changes: 1 addition & 1 deletion frontend/glideinFrontendElement.py
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@ def refresh_entry_token(self, glidein_el):
# The token file is read as text file below. Writing fixed to be consistent
with tempfile.NamedTemporaryFile(mode="w", delete=False, dir=tkn_dir) as fd:
os.chmod(fd.name, 0o600)
fd.write(tkn_str.encode())
fd.write(tkn_str)
os.replace(fd.name, tkn_file)
logSupport.log.debug("created token %s" % tkn_file)
elif os.path.exists(tkn_file):
Expand Down
3 changes: 2 additions & 1 deletion lib/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ def force_bytes(instr, encoding=BINARY_ENCODING_CRYPTO):
ValueError: if it detects an improper str conversion (b'' around the string)
"""
if isinstance(instr, str):
# raise Exception("ALREADY str!")
# raise Exception("ALREADY str!") # Use this for investigations
if instr.startswith("b'"):
raise ValueError(
"Input was improperly converted into string (resulting in b'' characters added): %s" % instr
)
# If the encoding is known codecs can be used for more efficiency, e.g. codecs.latin_1_encode(x)[0]
return instr.encode(encoding)
return instr

Expand Down
73 changes: 39 additions & 34 deletions lib/token_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# This is a collection of utility functions for HTCondor IDTOKEN generation


import codecs
import os
import re
import socket
Expand Down Expand Up @@ -77,56 +78,54 @@ def token_str_expired(token_str):
return expired


def simple_scramble(data):
def simple_scramble(in_buf):
"""Undo the simple scramble of HTCondor
simply XOR with 0xdeadbeef
Using defaults.BINARY_ENCODING (latin-1) to have a 1-to-1 characted-byte correspondence
Source: https://github.com/CoffeaTeam/jhub/blob/master/charts/coffea-casa-jhub/files/hub/auth.py#L196-L235
simply XOR with 0xdeadbeef
Source: https://github.com/CoffeaTeam/coffea-casa/blob/master/charts/coffea-casa/files/hub-extra/auth.py
Args:
data(bytearray): binary string to be unscrambled
Returns:
bytearray: an HTCondor scrambled binary string
"""
outb = "".encode(defaults.BINARY_ENCODING)
deadbeef = [0xDE, 0xAD, 0xBE, 0xEF]
ldata = len(data)
lbeef = len(deadbeef)
for i in range(ldata):
if sys.version_info[0] == 2:
datum = struct.unpack("B", data[i])[0]
else:
datum = data[i]
rslt = datum ^ deadbeef[i % lbeef]
b1 = struct.pack("H", rslt)[0]
outb += ("%c" % b1).encode(defaults.BINARY_ENCODING)
return outb
DEADBEEF = (0xDE, 0xAD, 0xBE, 0xEF)
out_buf = b""
for idx in range(len(in_buf)):
scramble = in_buf[idx] ^ DEADBEEF[idx % 4] # 4 = len(DEADBEEF)
out_buf += b"%c" % scramble
return out_buf


def derive_master_key(password):
"""Derive an encryption/decryption key
Source: https://github.com/CoffeaTeam/jhub/blob/master/charts/coffea-casa-jhub/files/hub/auth.py#L196-L235
Source: https://github.com/CoffeaTeam/coffea-casa/blob/master/charts/coffea-casa/files/hub-extra/auth.py
Args:
password(bytes): an unscrambled HTCondor password (bytes-like: bytes, bytearray, memoryview)
Returns:
str: an HTCondor encryption/decryption key
bytes: an HTCondor encryption/decryption key
"""

# Key length, salt, and info fixed as part of protocol
# Key length, salt, and info are fixed as part of the protocol
# Here the types and meaning from cryptography.hazmat.primitives.kdf.hkdf:
# HKDF.__init__
# Aalgorithm – An instance of HashAlgorithm.
# length(int) – key length in bytes
# salt(bytes) – To randomize
# info(bytes) – Application data
hkdf = HKDF(
algorithm=hashes.SHA256(),
length=32,
salt="htcondor".encode(defaults.BINARY_ENCODING),
info="master jwt".encode(defaults.BINARY_ENCODING),
salt=b"htcondor",
info=b"master jwt",
backend=default_backend(),
)
# HKDF.derive() requires bytes and returns bytes
return hkdf.derive(password).decode(defaults.BINARY_ENCODING)
return hkdf.derive(password)


def sign_token(identity, issuer, kid, master_key, duration=None, scope=None):
Expand All @@ -136,16 +135,15 @@ def sign_token(identity, issuer, kid, master_key, duration=None, scope=None):
identity(str): who the token was generated for
issuer(str): idtoken issuer, typically HTCondor Collector
kid(str): Key ID
master_key(str): encryption key
master_key(bytes): encryption key
duration(int, optional): number of seconds IDTOKEN is valid. Default: infinity
scope(str, optional): permissions IDTOKEN has. Default: everything
Returns:
str: a signed IDTOKEN
str: a signed IDTOKEN (jwt token)
"""

iat = int(time.time())

payload = {
"sub": identity,
"iat": iat,
Expand All @@ -158,6 +156,10 @@ def sign_token(identity, issuer, kid, master_key, duration=None, scope=None):
payload["exp"] = exp
if scope:
payload["scope"] = scope
# master_key should be `bytes`. `str` could cause value changes if was decoded not using utf-8.
# The manual (https://pyjwt.readthedocs.io/en/stable/api.html) is incorrect to list `str` only.
# The source code (https://github.com/jpadilla/pyjwt/blob/72ad55f6d7041ae698dc0790a690804118be50fc/jwt/api_jws.py)
# shows `AllowedPrivateKeys | str | bytes` and if it is str, then it is encoded w/ utf-8: value.encode("utf-8")
encoded = jwt.encode(payload, master_key, algorithm="HS256", headers={"kid": kid})
return encoded

Expand Down Expand Up @@ -185,7 +187,6 @@ def create_and_sign_token(pwd_file, issuer=None, identity=None, kid=None, durati
if not kid:
kid = os.path.basename(pwd_file)
if not issuer:
# split() has been added because condor is only considering the first part. Here is
# As of Oct 2022
# TRUST_DOMAIN is an opaque string to be taken as it is (Brian B.), but for tokens only the first collector
# is considered in the TRUST_DOMAIN (TJ, generate_token HTCSS code):
Expand All @@ -201,30 +202,34 @@ def create_and_sign_token(pwd_file, issuer=None, identity=None, kid=None, durati
# TRUST_DOMAIN=vocms0803.cern.ch:9618
# TRUST_DOMAIN=vocms0803.cern.ch:9618,Some Random Text
# are all considered the same - vocms0803.cern.ch:9618."
full_issuer = iexe_cmd("condor_config_val TRUST_DOMAIN").strip()
full_issuer = iexe_cmd("condor_config_val TRUST_DOMAIN").strip() # Remove trailing spaces and newline
if not full_issuer:
logSupport.log.warning(
"Unable to retrieve TRUST_DOMAIN and no issuer provided: token will have empty 'iss'"
)
else:
# to set the issuer TRUST_DOMAIN is split no matter whether coming from COLLECTOR_HOST or not
# To set the issuer TRUST_DOMAIN is split no matter whether coming from COLLECTOR_HOST or not
# Using the same splitting as creation/web_base/setup_x509.sh
# is_default_trust_domain = "# at: <Default>" in iexe_cmd("condor_config_val -v TRUST_DOMAIN")
split_issuers = re.split(" |,|\t", full_issuer) # get only the first collector
# re.split(r":|\?", split_issuers[0]) would remove also synful string and port (to have the same tring for secondary collectors, but not needed)
issuer = split_issuers[0]
if not identity:
identity = f"{os.getlogin()}@{socket.gethostname()}"

with open(pwd_file, "rb") as fd:
data = fd.read()
master_key = derive_master_key(simple_scramble(data))
password = simple_scramble(data)
# The POOL password requires a special handling
# Done in https://github.com/CoffeaTeam/coffea-casa/blob/master/charts/coffea-casa/files/hub-extra/auth.py#L252
if kid == "POOL":
password += password
master_key = derive_master_key(password)
return sign_token(identity, issuer, kid, master_key, duration, scope)


# to test: need htcondor password file (for example el7_osg34)
# python token_util.py el7_osg34 $HOSTNAME:9618 vofrontend_service@$HOSTNAME
# will output condor IDTOKEN to stdout - use condor_ping to verify/validate
# To test you need htcondor password file
# python3 token_util.py <condor_password_file_path> $HOSTNAME:9618 vofrontend_service@$HOSTNAME
# will output condor IDTOKEN to stdout - use condor_ping to the server to verify/validate
if __name__ == "__main__":
kid = sys.argv[1]
issuer = sys.argv[2]
Expand Down

0 comments on commit 69eea68

Please sign in to comment.