From 620c0d847dad3f5d2ea5ff2ba23f6df57b3f2889 Mon Sep 17 00:00:00 2001 From: "Kirk.Sayre" Date: Mon, 8 Mar 2021 16:38:17 -0600 Subject: [PATCH 1/4] Correctly report extended ASCII characters in strings. --- oletools/olevba.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index e413d023a..0cb72989b 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -388,10 +388,11 @@ def unicode2str(unicode_string): :return: the string converted to str :rtype: str """ - if PYTHON2: - return unicode_string.encode('utf8', errors='replace') - else: - return unicode_string + # Unicode conversion does nasty things to VBA extended ASCII + # characters. VBA payload decode routines work correctly with the + # raw byte values in payload strings in the decompressed VBA, so leave + # strings alone. + return unicode_string def bytes2str(bytes_string, encoding='utf8'): @@ -2082,9 +2083,23 @@ def decode_bytes(self, bytes_string, errors='replace'): :param errors: str, mode to handle unicode conversion errors :return: str/unicode, decoded string """ - return bytes_string.decode(self.codec, errors=errors) - + # Unicode conversion does nasty things to VBA extended ASCII + # characters. VBA payload decode routines work correctly with the + # raw byte values in payload strings in the decompressed VBA, so leave + # strings alone. + s = "" + in_str = False + for b in bytes_string: + # Track if we are in a string. + if (b == '"'): + in_str = not in_str + # Empirically looks like '\n' may be escaped in strings like this. + if ((b == "\n") and in_str): + s += chr(0x85) + continue + s += b + return s def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=True): """ @@ -4589,6 +4604,7 @@ def main(cmd_line_args=None): sys.exit(return_code) if __name__ == '__main__': + print("MODIFIED!!") main() # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness From 15ea94430705eb975c70c93533ffd5b029502645 Mon Sep 17 00:00:00 2001 From: "Kirk.Sayre" Date: Tue, 9 Mar 2021 11:55:47 -0600 Subject: [PATCH 2/4] Target raw dtring usage to just decompressed VBA code. --- oletools/olevba.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index 0cb72989b..727853993 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -388,11 +388,10 @@ def unicode2str(unicode_string): :return: the string converted to str :rtype: str """ - # Unicode conversion does nasty things to VBA extended ASCII - # characters. VBA payload decode routines work correctly with the - # raw byte values in payload strings in the decompressed VBA, so leave - # strings alone. - return unicode_string + if PYTHON2: + return unicode_string.encode('utf8', errors='replace') + else: + return unicode_string def bytes2str(bytes_string, encoding='utf8'): @@ -1634,10 +1633,12 @@ def __init__(self, project, dir_stream, module_index): code_data = decompress_stream(bytearray(code_data)) # store the raw code encoded as bytes with the project's code page: self.code_raw = code_data - # decode it to unicode: - self.code = project.decode_bytes(code_data) - # also store a native str version: - self.code_str = unicode2str(self.code) + # Unicode conversion does nasty things to VBA extended ASCII + # characters. VBA payload decode routines work correctly with the + # raw byte values in payload strings in the decompressed VBA, so leave + # strings alone. + self.code = project.fix_bytes(code_data) + self.code_str = self.code # case-insensitive search in the code_modules dict to find the file extension: filext = self.project.module_ext.get(self.name.lower(), 'vba') self.filename = u'{0}.{1}'.format(self.name, filext) @@ -2083,11 +2084,14 @@ def decode_bytes(self, bytes_string, errors='replace'): :param errors: str, mode to handle unicode conversion errors :return: str/unicode, decoded string """ - - # Unicode conversion does nasty things to VBA extended ASCII - # characters. VBA payload decode routines work correctly with the - # raw byte values in payload strings in the decompressed VBA, so leave - # strings alone. + return bytes_string.decode(self.codec, errors=errors) + + def fix_bytes(self, bytes_string): + """ + Change the escaping (value) of a few characters in decompressed VBA code. + :param bytes_string: bytes, bytes string to be fixed + :return: bytes, fixed string + """ s = "" in_str = False for b in bytes_string: From 6bd82b2fded28647caddf983e72c596ffb4ccde1 Mon Sep 17 00:00:00 2001 From: "Kirk.Sayre" Date: Tue, 9 Mar 2021 14:26:15 -0600 Subject: [PATCH 3/4] Removed debug print statement. --- oletools/olevba.py | 1 - 1 file changed, 1 deletion(-) diff --git a/oletools/olevba.py b/oletools/olevba.py index 727853993..351f1965a 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -4608,7 +4608,6 @@ def main(cmd_line_args=None): sys.exit(return_code) if __name__ == '__main__': - print("MODIFIED!!") main() # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness From 49473dabe752a59e5a061a10dcee485e231c54e8 Mon Sep 17 00:00:00 2001 From: "Kirk.Sayre" Date: Fri, 19 Mar 2021 11:58:36 -0500 Subject: [PATCH 4/4] Fix some bad characters in decoded VBA. --- oletools/olevba.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/oletools/olevba.py b/oletools/olevba.py index 351f1965a..f7f3a236f 100644 --- a/oletools/olevba.py +++ b/oletools/olevba.py @@ -2092,6 +2092,8 @@ def fix_bytes(self, bytes_string): :param bytes_string: bytes, bytes string to be fixed :return: bytes, fixed string """ + if ('"' not in bytes_string): + return bytes_string s = "" in_str = False for b in bytes_string: @@ -2103,6 +2105,7 @@ def fix_bytes(self, bytes_string): s += chr(0x85) continue s += b + s = s.replace("\n" + chr(0x85), "\n") return s def _extract_vba(ole, vba_root, project_path, dir_path, relaxed=True):