From b762595d728d98fe46524d16cce39d107d0145de Mon Sep 17 00:00:00 2001 From: Philipp Eder Date: Wed, 17 Jan 2024 09:25:28 +0100 Subject: [PATCH] Refactor: document cmake tokenizer --- pontos/version/commands/_cmake.py | 45 ++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/pontos/version/commands/_cmake.py b/pontos/version/commands/_cmake.py index 2f1acc26..e7836631 100644 --- a/pontos/version/commands/_cmake.py +++ b/pontos/version/commands/_cmake.py @@ -73,17 +73,44 @@ def __init__(self, cmake_content_lines: str): self._project_dev_version_line_number = pd_line_no self._project_dev_version = pd + # The tokenizer is used to parse and identify specific elements in CMake scripts. + # We are interested in identifying words that represent functions, variables, and their values. + # Specifically, we want to scan for the words 'project', 'version', 'set', 'PROJECT_DEV_VERSION', + # and their respective values, as we need to modify them. __cmake_scanner = re.Scanner( # type: ignore [ - (r"#.*", lambda _, token: ("comment", token)), - (r'"[^"]*"', lambda _, token: ("string", token)), - (r'"[0-9]+"', lambda _, token: ("number", token)), - (r"\(", lambda _, token: ("open_bracket", token)), - (r"\)", lambda _, token: ("close_bracket", token)), - (r'[^ \t\r\n()#"]+', lambda _, token: ("word", token)), - (r"\n", lambda _, token: ("newline", token)), - # to have spaces etc correctly - (r"\s+", lambda _, token: ("special_printable", token)), + ( + r"#.*", + lambda _, token: ("comment", token), + ), # so that we can skip ahead + ( + r'"[^"]*"', + lambda _, token: ("string", token), + ), # so that we can verify if a value is a string value + ( + r'"[0-9]+"', + lambda _, token: ("number", token), + ), # so that we can verify if a value is numeric + ( + r"\(", + lambda _, token: ("open_bracket", token), + ), # so that we can identify function calls + ( + r"\)", + lambda _, token: ("close_bracket", token), + ), # so that we can identify end of function calls + ( + r'[^ \t\r\n()#"]+', + lambda _, token: ("word", token), + ), # so that we can identify words (identifiers) + ( + r"\n", + lambda _, token: ("newline", token), + ), # so that we can keep track of the position + ( + r"\s+", + lambda _, token: ("special_printable", token), + ), # so that we can keep track of the position ] )