More string decoding fixing

Decompollaborate · Apr 26, 2024 · ca7a688 · ca7a688
1 parent de4a2ef
commit ca7a688
Showing 1 changed file with 34 additions and 40 deletions.
diff --git a/spimdisasm/common/Utils.py b/spimdisasm/common/Utils.py
@@ -250,12 +250,6 @@ def getMaybeBooleyFromMaybeStr(booley: str|None) -> bool|None:
     0x8D,
 }
 
-escapeCharactersMaybeRealLookAhead = {
-    0x8C,
-    0x8D,
-    0xC9,
-}
-
 def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminator: int=0) -> tuple[list[str], int]:
     result = []
 
@@ -266,48 +260,48 @@ def decodeBytesToStrings(buf: bytes, offset: int, stringEncoding: str, terminato
         if char in bannedEscapeCharacters:
             return [], -10
 
-        # Some of the escape characters are real Japanese characters, so we need to properly check them
-        theEscapeCharacterWasARealChar = False
-        if char in escapeCharactersMaybeRealLookAhead:
+        validNonAsciiSequence = False
+        if char > 0x7F:
             dst.append(char)
             try:
                 decoded = dst.decode(stringEncoding)
-                theEscapeCharacterWasARealChar = True
+                validNonAsciiSequence = True
             except UnicodeDecodeError:
                 if offset + i + 1 < len(buf):
                     nextChar = buf[offset + i + 1]
-                    dst.append(nextChar)
-                    try:
-                        decoded = dst.decode(stringEncoding)
-                        theEscapeCharacterWasARealChar = True
-                    except UnicodeDecodeError:
-                        pass
-                    dst.pop()
+                    if nextChar != 0x5C: # '\\'
+                        dst.append(nextChar)
+                        try:
+                            decoded = dst.decode(stringEncoding)
+                            validNonAsciiSequence = True
+                        except UnicodeDecodeError:
+                            pass
+                        dst.pop()
             dst.pop()
 
-        if not theEscapeCharacterWasARealChar and char > 0x7F and offset + i + 1 < len(buf):
-            nextChar = buf[offset + i + 1]
-            if nextChar == 0x5C: # '\\'
-                # If the second part of a Japanese character is the 0x5C value ('\\') then we need to
-                # special handle it. Otherwise when it gets iconv'd then the compiler will get confused
-                # and think it should try to escape the next character instead.
-                # So we break down the string here, add this two characters as individual characters and
-                # we skip them
-
-                if dst:
-                    try:
-                        decoded = dst.decode(stringEncoding)
-                    except UnicodeDecodeError:
-                        return [], -60
-                    result.append(rabbitizer.Utils.escapeString(decoded))
-                    dst.clear()
-                result.append(f"\\x{char:02X}")
-                result.append(f"\\x{nextChar:02X}")
-
-                i += 2
-                continue
-
-        if not theEscapeCharacterWasARealChar and char in escapeCharactersSpecialCases:
+            if not validNonAsciiSequence and offset + i + 1 < len(buf):
+                nextChar = buf[offset + i + 1]
+                if nextChar == 0x5C: # '\\'
+                    # If the second byte of a Japanese character is the 0x5C value ('\\') then we need to
+                    # handle it specially. Otherwise, when it gets iconv'd, the compiler gets confused
+                    # and thinks it should try to escape the next character instead.
+                    # So we break down the string here, add these two characters as individual characters and
+                    # skip them
+
+                    if dst:
+                        try:
+                            decoded = dst.decode(stringEncoding)
+                        except UnicodeDecodeError:
+                            return [], -60
+                        result.append(rabbitizer.Utils.escapeString(decoded))
+                        dst.clear()
+                    result.append(f"\\x{char:02X}")
+                    result.append(f"\\x{nextChar:02X}")
+
+                    i += 2
+                    continue
+
+        if not validNonAsciiSequence and char in escapeCharactersSpecialCases:
             if dst:
                 try:
                     decoded = dst.decode(stringEncoding)