Skip to content

Commit

Permalink
Preserve newlines when removing comments (#43)
Browse files Browse the repository at this point in the history
  • Loading branch information
Schamper authored Aug 4, 2023
1 parent 38d332f commit f9a9f86
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
8 changes: 4 additions & 4 deletions dissect/cstruct/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,11 +299,11 @@ def _remove_comments(string: str) -> str:
# second group captures comments (//single-line or /* multi-line */)
regex = re.compile(pattern, re.MULTILINE | re.DOTALL)

def _replacer(match):
def _replacer(match: re.Match) -> str:
# if the 2nd group (capturing comments) is not None,
# it means we have captured a non-quoted (real) comment string.
if match.group(2) is not None:
return "" # so we will return empty to remove the comment
if comment := match.group(2):
return "\n" * comment.count("\n") # so we will return empty to remove the comment
else: # otherwise, we will return the 1st group
return match.group(1) # captured quoted-string

Expand All @@ -314,7 +314,7 @@ def _lineno(tok: Token) -> int:
"""Quick and dirty line number calculator"""

match = tok.match
return match.string.count("\n", 0, match.start())
return match.string.count("\n", 0, match.start()) + 1

def _config_flag(self, tokens: TokenConsumer) -> None:
flag_token = tokens.consume()
Expand Down
26 changes: 26 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from unittest.mock import Mock

from dissect.cstruct.parser import TokenParser


def test_preserve_comment_newlines():
cdef = """
// normal comment
#define normal_anchor
/*
* Multi
* line
* comment
*/
#define multi_anchor
"""
data = TokenParser._remove_comments(cdef)
print(repr(data))

mock_token = Mock()
mock_token.match.string = data
mock_token.match.start.return_value = data.index("#define normal_anchor")
assert TokenParser._lineno(mock_token) == 3

mock_token.match.start.return_value = data.index("#define multi_anchor")
assert TokenParser._lineno(mock_token) == 9

0 comments on commit f9a9f86

Please sign in to comment.