Skip to content

Commit

Permalink
More string decoding fixing
Browse files Browse the repository at this point in the history
  • Loading branch information
AngheloAlf committed Apr 26, 2024
1 parent de4a2ef commit ca7a688
Showing 1 changed file with 34 additions and 40 deletions.
74 changes: 34 additions & 40 deletions spimdisasm/common/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,6 @@ def getMaybeBooleyFromMaybeStr(booley: str|None) -> bool|None:
0x8D,
}

escapeCharactersMaybeRealLookAhead = {
0x8C,
0x8D,
0xC9,
}

def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminator: int=0) -> tuple[list[str], int]:
result = []

Expand All @@ -266,48 +260,48 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato
if char in bannedEscapeCharacters:
return [], -10

# Some of the escape characters are real Japanese characters, so we need to properly check them
theEscapeCharacterWasARealChar = False
if char in escapeCharactersMaybeRealLookAhead:
validNonAsciiSequence = False
if char > 0x7F:
dst.append(char)
try:
decoded = dst.decode(stringEncoding)
theEscapeCharacterWasARealChar = True
validNonAsciiSequence = True
except UnicodeDecodeError:
if offset + i + 1 < len(buf):
nextChar = buf[offset + i + 1]
dst.append(nextChar)
try:
decoded = dst.decode(stringEncoding)
theEscapeCharacterWasARealChar = True
except UnicodeDecodeError:
pass
dst.pop()
if nextChar != 0x5C: # '\\'
dst.append(nextChar)
try:
decoded = dst.decode(stringEncoding)
validNonAsciiSequence = True
except UnicodeDecodeError:
pass
dst.pop()
dst.pop()

if not theEscapeCharacterWasARealChar and char > 0x7F and offset + i + 1 < len(buf):
nextChar = buf[offset + i + 1]
if nextChar == 0x5C: # '\\'
# If the second part of a Japanese character is the 0x5C value ('\\') then we need to
# special handle it. Otherwise when it gets iconv'd then the compiler will get confused
# and think it should try to escape the next character instead.
# So we break down the string here, add this two characters as individual characters and
# we skip them

if dst:
try:
decoded = dst.decode(stringEncoding)
except UnicodeDecodeError:
return [], -60
result.append(rabbitizer.Utils.escapeString(decoded))
dst.clear()
result.append(f"\\x{char:02X}")
result.append(f"\\x{nextChar:02X}")

i += 2
continue

if not theEscapeCharacterWasARealChar and char in escapeCharactersSpecialCases:
if not validNonAsciiSequence and offset + i + 1 < len(buf):
nextChar = buf[offset + i + 1]
if nextChar == 0x5C: # '\\'
# If the second byte of a Japanese character is the 0x5C value ('\\') then we need to
# handle it specially. Otherwise, when it gets iconv'd, the compiler gets confused
# and thinks it should try to escape the next character instead.
# So we break down the string here, add these two characters as individual characters and
# skip them

if dst:
try:
decoded = dst.decode(stringEncoding)
except UnicodeDecodeError:
return [], -60
result.append(rabbitizer.Utils.escapeString(decoded))
dst.clear()
result.append(f"\\x{char:02X}")
result.append(f"\\x{nextChar:02X}")

i += 2
continue

if not validNonAsciiSequence and char in escapeCharactersSpecialCases:
if dst:
try:
decoded = dst.decode(stringEncoding)
Expand Down

0 comments on commit ca7a688

Please sign in to comment.