From 797f221ab0fc7cfce7255bbd4a2d1544000e4c85 Mon Sep 17 00:00:00 2001 From: Sebastian Simon Date: Fri, 11 Oct 2024 16:18:37 +0200 Subject: [PATCH] Revise config type inferer --- .../config_types/config_type_inferer.py | 397 +++++++++++------- src/cfgnet/linker/equality_linker.py | 2 +- src/cfgnet/linker/linker.py | 2 + src/cfgnet/plugins/concept/docker_plugin.py | 3 +- src/cfgnet/plugins/file_type/hadoop_plugin.py | 9 +- src/cfgnet/plugins/plugin.py | 2 + .../config_types/test_config_type_inferer.py | 159 +++++-- 7 files changed, 380 insertions(+), 194 deletions(-) diff --git a/src/cfgnet/config_types/config_type_inferer.py b/src/cfgnet/config_types/config_type_inferer.py index c729ca0e..c9545f76 100644 --- a/src/cfgnet/config_types/config_type_inferer.py +++ b/src/cfgnet/config_types/config_type_inferer.py @@ -13,8 +13,13 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . import re +from typing import Tuple +from enum import Enum from cfgnet.config_types.config_types import ConfigType +class Confidence(Enum): + HIGH = 2 + LOW = 1 # pylint: disable=too-many-public-methods class ConfigTypeInferer: @@ -31,8 +36,7 @@ class ConfigTypeInferer: 2. Second, the option name or value are checked against specific types for which exist only one regular expression. 3. Lastly, the option name or value checked against general types. - """ - + """ regex_password_option = re.compile(r"password|pwd|pass") regex_password_value = re.compile(r".+") @@ -41,25 +45,21 @@ class ConfigTypeInferer: r"([1-9][0-9]{0,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])" ) - regex_size_option = re.compile( - r"size|length|max|min|threshold|weight|height|memory|mem|byte|mb" - ) - regex_size_value = re.compile(r"(\d)+ ?(B|KB|MB|GB|TB|PB)?") + regex_size_option = re.compile(r"size|length|memory") + regex_size_value = re.compile(r"(\d)+ ?(B|KB|MB|GB|TB|PB)") regex_username_option = re.compile(r"user|usr|username") regex_username_value = re.compile(r"[a-zA-Z][a-zA-Z0-9_]+") - regex_time_option = re.compile( - r"time|interval|day|month|year|hour|minute|second|millisecond" - ) + regex_time_option = re.compile(r"time|timeout|interval|delay|duration") regex_time_value = re.compile(r"[\d]+ ?(s|min|h|d|ms)*") - regex_filepath_option = re.compile( - r"path|dir|directory|folder|destination|root" - ) - # regex_filepath_value = re.compile(r"\/?([^\/]+\/)+[^\/]*") + regex_filepath_option = re.compile(r"path|dir|directory|folder|root") regex_filepath_value = re.compile(r"^([~.\w\d]*\/[.\w\d]+)+(\.[\w\d]+)*$") + regex_filename_option = re.compile(r"file") + regex_filename_value= re.compile(r"^[a-zA-Z0-9_\-]+(\.[a-zA-Z0-9_\-]+)*\.[a-zA-Z0-9]{1,6}$") + regex_version_number_option = re.compile(r"version|target|source") regex_version_number_value = re.compile( r"^(\^|~)?(?:[0-9]{1,3}\.){2}[0-9]{1,3}(-[\w]+)?$" @@ -68,141 +68,244 @@ class ConfigTypeInferer: regex_ip_address_option = re.compile(r"address|ip") regex_ip_address_value = re.compile(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$") - regex_domain_main = re.compile( - r"(telnet|https|http|ftp)+:\/\/(\w)+((\.\w+)+):[\d]+" - ) - regex_url = re.compile(r"(https|http)+:\/\/.*") - regex_boolean = re.compile( - r"[tT][rR][uU][eE]|[fF][aA][lL][sS][eE]|[oO][nN]|[oO][fF]{2}|[yY][eE][sS]|[nN][oO]" - ) - regex_filename = re.compile(r"\/?[a-zA-z_-]+\.[a-zA-z_-]+") - regex_email = re.compile(r"^(\w)+(\.\w+)*@(\w)+((\.\w+)+)") - regex_speed = re.compile(r"[\d]+ ?(bps|Mbps|Kbps)") - regex_number = re.compile(r"[\d.]+") - regex_id = re.compile(r"identity|identifier|id") - regex_name = re.compile(r"name|alias") - regex_mode = re.compile(r"mode") - regex_count = re.compile(r"count") - regex_pattern = re.compile(r"match|pattern") - regex_environment = re.compile(r"env|environment") - regex_image = re.compile(r"image") - regex_command = re.compile(r"command|entrypoint|cmd|script|bin|install") - regex_license = re.compile(r"license") - - @staticmethod - def is_boolean(value: str) -> bool: - return bool(re.fullmatch(ConfigTypeInferer.regex_boolean, value)) - - # pylint: disable=too-many-return-statements - @staticmethod - def get_config_type( # noqa: C901 - option_name: str, value: str - ) -> ConfigType: - """Check the option value and return its config type.""" - # Check option name and value against types for which an option name and value regex exists. - option_name = option_name.split(".")[-1] - - if ConfigTypeInferer.is_boolean(value): + regex_url_option = re.compile(r"url|link") + regex_url_value= re.compile(r"(https|http)+:\/\/.*") + + regex_email_option = re.compile(r"email|mail") + regex_email_value = re.compile(r"^(\w)+(\.\w+)*@(\w)+((\.\w+)+)") + + regex_speed_option = re.compile(r"speed|rate") + regex_speed_value = re.compile(r"[\d]+ ?(bps|Mbps|Kbps)") + + regex_number_value = re.compile(r"[\d.]+") + + regex_id_option = re.compile(r"identifier|id|token|key") + regex_id_value = re.compile(r"[a-z0-9-_.]+") + + regex_name_option = re.compile(r"name|alias|label") + regex_mode_option = re.compile(r"mode|enable|disable|flag|switch|active|inactive") + regex_count_option = re.compile(r"count|total|number|limit|max|min") + regex_image_option = re.compile(r"image") + regex_command_option = re.compile(r"command|entrypoint|cmd|script|bin|install|run") + regex_license_option = re.compile(r"license") + + def is_number(self, value: str) -> bool: + try: + float(value) + return True + except ValueError: + return False + + def is_boolean(self, value: str) -> bool: + return value.lower() in ['true', 'false', '1', '0', 'yes', 'no', 'on', 'off'] + + def is_username(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_username_option, option_name) and re.fullmatch(self.regex_username_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_username_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_count(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_count_option, option_name) and self.is_number(value): + return True, Confidence.HIGH + + if re.search(self.regex_count_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_port(self, option_name: str, value: str) -> Tuple: + try: + port_value = int(value) + is_port_value = 0 <= port_value <= 665535 + if is_port_value: + if re.search(self.regex_port_option, option_name): + return True, Confidence.HIGH + return True, Confidence.LOW + return False, None + except ValueError: + return False, None + + def is_size(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_size_option, option_name) and re.fullmatch(self.regex_size_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_size_option, option_name) and self.is_number(value): + return True, Confidence.HIGH + + if re.fullmatch(self.regex_size_value, value) or re.search(self.regex_size_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_time(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_time_option, option_name) and re.fullmatch(self.regex_time_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_time_option, option_name) and self.is_number(value): + return True, Confidence.HIGH + + if re.search(self.regex_time_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_password(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_password_option, option_name) and re.fullmatch(self.regex_password_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_password_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_path(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_filepath_option, option_name) and re.fullmatch(self.regex_filepath_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_filepath_option, option_name) or re.fullmatch(self.regex_filepath_value, value): + return True, Confidence.LOW + + if re.search(self.regex_filename_option, option_name) and re.fullmatch(self.regex_filename_value, value): + return True, Confidence.HIGH + + return False, None + + def is_version_number(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_version_number_option, option_name) and re.fullmatch(self.regex_version_number_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_version_number_option, option_name) or re.fullmatch(self.regex_version_number_value, value): + return True, Confidence.LOW + + return False, None + + def is_ip_address(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_ip_address_option, option_name) and re.fullmatch(self.regex_ip_address_value, value): + return True, Confidence.HIGH + + if re.fullmatch(self.regex_ip_address_value, value) or re.search(self.regex_ip_address_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_email(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_email_option, option_name) and re.fullmatch(self.regex_email_value, value): + return True, Confidence.HIGH + + if re.fullmatch(self.regex_email_value, value) or re.search(self.regex_email_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_speed(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_speed_option, option_name) and re.fullmatch(self.regex_speed_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_speed_option, option_name) and self.is_number(value): + return True, Confidence.HIGH + + if re.fullmatch(self.regex_speed_value, value) or re.search(self.regex_speed_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_url(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_url_option, option_name) and re.fullmatch(self.regex_url_value, value): + return True, Confidence.HIGH + + if re.fullmatch(self.regex_url_value, value) or re.search(self.regex_url_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_id(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_id_option, option_name) and re.fullmatch(self.regex_id_value, value): + return True, Confidence.HIGH + + if re.search(self.regex_id_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_mode(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_mode_option, option_name): + return True, Confidence.LOW + return False, None + + def is_image(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_image_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_command(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_command_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_license(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_license_option, option_name): + return True, Confidence.LOW + + return False, None + + def is_name(self, option_name: str, value: str) -> Tuple: + if re.search(self.regex_name_option, option_name): + return True, Confidence.LOW + + return False, None + + + def get_config_type(self, option_name: str, value: str): + """ + Gets all possible config types based on option name and value + and sorts the results based on confidence level. + """ + results = [] + + # Check for each type using respective methods and append to results + checks = [ + (self.is_password, ConfigType.PASSWORD), + (self.is_port, ConfigType.PORT), + (self.is_size, ConfigType.SIZE), + (self.is_username, ConfigType.USERNAME), + (self.is_time, ConfigType.TIME), + (self.is_path, ConfigType.PATH), + (self.is_version_number, ConfigType.VERSION_NUMBER), + (self.is_ip_address, ConfigType.IP_ADDRESS), + (self.is_email, ConfigType.EMAIL), + (self.is_speed, ConfigType.SPEED), + (self.is_url, ConfigType.URL), + (self.is_id, ConfigType.ID), + (self.is_mode, ConfigType.MODE), + (self.is_count, ConfigType.COUNT), + (self.is_image, ConfigType.IMAGE), + (self.is_command, ConfigType.COMMAND), + (self.is_license, ConfigType.LICENSE), + (self.is_name, ConfigType.NAME) + ] + + for check_method, config_type in checks: + matched, confidence = check_method(option_name, value) + if matched: + results.append((config_type, confidence)) + + results.sort(key=lambda x: x[1].value, reverse=True) + + print(results) + + if self.is_boolean(value): return ConfigType.BOOLEAN - if bool( - re.search(ConfigTypeInferer.regex_port_option, option_name) - ) and bool(re.fullmatch(ConfigTypeInferer.regex_port_value, value)): - return ConfigType.PORT - - if bool( - re.search(ConfigTypeInferer.regex_username_option, option_name) - ) and bool( - re.fullmatch(ConfigTypeInferer.regex_username_value, value) - ): - return ConfigType.USERNAME - - if bool( - re.search(ConfigTypeInferer.regex_size_option, option_name) - ) and bool(re.fullmatch(ConfigTypeInferer.regex_size_value, value)): - return ConfigType.SIZE - - if bool( - re.search(ConfigTypeInferer.regex_time_option, option_name) - ) and bool(re.fullmatch(ConfigTypeInferer.regex_time_value, value)): - return ConfigType.TIME - - if bool( - re.search(ConfigTypeInferer.regex_password_option, option_name) - ) and bool( - re.fullmatch(ConfigTypeInferer.regex_password_value, value) - ): - return ConfigType.PASSWORD - - if bool( - re.search(ConfigTypeInferer.regex_filepath_option, option_name) - ) or bool(re.fullmatch(ConfigTypeInferer.regex_filepath_value, value)): - return ConfigType.PATH - - if bool( - re.search( - ConfigTypeInferer.regex_version_number_option, option_name - ) - ) or bool( - re.fullmatch(ConfigTypeInferer.regex_version_number_value, value) - ): - return ConfigType.VERSION_NUMBER - - if bool( - re.search(ConfigTypeInferer.regex_ip_address_option, option_name) - ) or bool( - re.fullmatch(ConfigTypeInferer.regex_ip_address_value, value) - ): - return ConfigType.IP_ADDRESS - - # Check option name and value against specific types. - if bool(re.fullmatch(ConfigTypeInferer.regex_filename, value)): - return ConfigType.PATH - - if bool(re.fullmatch(ConfigTypeInferer.regex_email, value)): - return ConfigType.EMAIL - - if bool(re.fullmatch(ConfigTypeInferer.regex_domain_main, value)): - return ConfigType.DOMAIN_NAME - - if bool(re.fullmatch(ConfigTypeInferer.regex_speed, value)): - return ConfigType.SPEED - - if bool(re.fullmatch(ConfigTypeInferer.regex_url, value)): - return ConfigType.URL - - if bool(re.search(ConfigTypeInferer.regex_id, option_name)): - return ConfigType.ID - - if bool(re.search(ConfigTypeInferer.regex_mode, option_name)): - return ConfigType.MODE - - if bool(re.search(ConfigTypeInferer.regex_count, option_name)): - return ConfigType.COUNT - - if bool(re.search(ConfigTypeInferer.regex_name, option_name)): - return ConfigType.NAME - - if bool(re.search(ConfigTypeInferer.regex_pattern, option_name)): - return ConfigType.PATTERN - - if bool(re.search(ConfigTypeInferer.regex_environment, option_name)): - return ConfigType.ENVIRONMENT - - if bool(re.search(ConfigTypeInferer.regex_image, option_name)): - return ConfigType.IMAGE - - if bool(re.search(ConfigTypeInferer.regex_command, option_name)): - return ConfigType.COMMAND - - if bool(re.search(ConfigTypeInferer.regex_license, option_name)): - return ConfigType.LICENSE - - # Check option name and value against general types. - if bool(re.fullmatch(ConfigTypeInferer.regex_number, value)): + if results: + return results[0][0] + + if self.is_number(value): return ConfigType.NUMBER - if bool(re.fullmatch(ConfigTypeInferer.regex_size_value, value)): - return ConfigType.SIZE - - return ConfigType.UNKNOWN + return ConfigType.UNKNOWN \ No newline at end of file diff --git a/src/cfgnet/linker/equality_linker.py b/src/cfgnet/linker/equality_linker.py index ae7e27ac..35f26fc5 100644 --- a/src/cfgnet/linker/equality_linker.py +++ b/src/cfgnet/linker/equality_linker.py @@ -50,7 +50,7 @@ def _find_target_nodes(self): return [ node for node in self.network.get_nodes(ValueNode) - if not ConfigTypeInferer.is_boolean(node.name) + if not self.inferer.is_boolean(node.name) ] def _find_matches(self, node: ValueNode) -> List[ValueNode]: diff --git a/src/cfgnet/linker/linker.py b/src/cfgnet/linker/linker.py index 072ab776..1c399966 100644 --- a/src/cfgnet/linker/linker.py +++ b/src/cfgnet/linker/linker.py @@ -20,6 +20,7 @@ from cfgnet.linker.link import Link from cfgnet.linker.static_blacklist import StaticBlackList from cfgnet.network.nodes import ValueNode +from cfgnet.config_types.config_type_inferer import ConfigTypeInferer if TYPE_CHECKING: from cfgnet.network.network import Network @@ -35,6 +36,7 @@ def __init__(self): self.enable_internal_links: Optional[bool] = None self.target_nodes: List = None self.static_blacklist = StaticBlackList() + self.inferer = ConfigTypeInferer() @abc.abstractmethod def create_links(self) -> None: diff --git a/src/cfgnet/plugins/concept/docker_plugin.py b/src/cfgnet/plugins/concept/docker_plugin.py index 475ae600..cf161284 100644 --- a/src/cfgnet/plugins/concept/docker_plugin.py +++ b/src/cfgnet/plugins/concept/docker_plugin.py @@ -19,7 +19,6 @@ from typing import List, Optional import dockerfile from cfgnet.config_types.config_types import ConfigType -from cfgnet.config_types.config_type_inferer import ConfigTypeInferer from cfgnet.network.nodes import ( ArtifactNode, @@ -203,7 +202,7 @@ def _add_params(self, option: OptionNode, parameters: List[str]) -> None: if len(parameters) == 1: parameters = parameters[0].split(" ") for param in parameters: - config_type = ConfigTypeInferer.get_config_type("", param) + config_type = self.inferer.get_config_type("", param) option_param = OptionNode( name="param" + str(param_counter), location=option.location, diff --git a/src/cfgnet/plugins/file_type/hadoop_plugin.py b/src/cfgnet/plugins/file_type/hadoop_plugin.py index f9fae1d7..e7559a19 100644 --- a/src/cfgnet/plugins/file_type/hadoop_plugin.py +++ b/src/cfgnet/plugins/file_type/hadoop_plugin.py @@ -19,7 +19,6 @@ from lxml import etree as ET from lxml.etree import _Element -from cfgnet.config_types.config_type_inferer import ConfigTypeInferer from cfgnet.config_types.config_types import ConfigType from cfgnet.network.nodes import ( ArtifactNode, @@ -65,7 +64,7 @@ def _parse_config_file( option_root = OptionNode( tree_root.tag, tree_root.sourceline, - ConfigTypeInferer.get_config_type(tree_root.tag, ""), + self.inferer.get_config_type(tree_root.tag, ""), ) artifact.add_child(option_root) for child in tree_root: @@ -87,7 +86,7 @@ def parse_tree(self, subtree: _Element, parent_node: Node): if name: if name == "property": - config_type = ConfigTypeInferer.get_config_type(name, "") + config_type = self.inferer.get_config_type(name, "") property_option = OptionNode( name, subtree.sourceline, config_type ) @@ -115,7 +114,7 @@ def parse_tree(self, subtree: _Element, parent_node: Node): # Add the value node, under the property name if property_value: - config_type = ConfigTypeInferer.get_config_type( + config_type = self.inferer.get_config_type( property_name, property_value ) option_value = OptionNode( @@ -137,7 +136,7 @@ def parse_tree(self, subtree: _Element, parent_node: Node): option_desc.add_child(description_node) else: - config_type = ConfigTypeInferer.get_config_type(name, "") + config_type = self.inferer.get_config_type(name, "") option = OptionNode(name, subtree.sourceline, config_type) parent_node.add_child(option) diff --git a/src/cfgnet/plugins/plugin.py b/src/cfgnet/plugins/plugin.py index fefd124a..65a8597d 100644 --- a/src/cfgnet/plugins/plugin.py +++ b/src/cfgnet/plugins/plugin.py @@ -18,6 +18,7 @@ import os from typing import Optional +from cfgnet.config_types.config_type_inferer import ConfigTypeInferer from cfgnet.network.nodes import ProjectNode, ArtifactNode @@ -33,6 +34,7 @@ def __init__(self, concept_name: str, threshold: Optional[int] = None): """ self.concept_name: str = concept_name self.file_size_threshold: Optional[int] = threshold + self.inferer = ConfigTypeInferer() @abc.abstractmethod def _parse_config_file( diff --git a/tests/cfgnet/config_types/test_config_type_inferer.py b/tests/cfgnet/config_types/test_config_type_inferer.py index c143de75..7ec7c6e8 100644 --- a/tests/cfgnet/config_types/test_config_type_inferer.py +++ b/tests/cfgnet/config_types/test_config_type_inferer.py @@ -12,56 +12,137 @@ # # You should have received a copy of the GNU General Public License along with # this program. If not, see . - - +import pytest from cfgnet.config_types.config_type_inferer import ConfigTypeInferer from cfgnet.config_types.config_types import ConfigType +test_dataset = [ + # Ports + {"option_name": "db_port", "value": "5432", "expected_type": ConfigType.PORT}, + {"option_name": "server_port", "value": "8080", "expected_type": ConfigType.PORT}, + {"option_name": "listener_port", "value": "80", "expected_type": ConfigType.PORT}, + + # Usernames + {"option_name": "admin_user", "value": "admin", "expected_type": ConfigType.USERNAME}, + {"option_name": "ftp_username", "value": "ftp_user123", "expected_type": ConfigType.USERNAME}, + + # Passwords + {"option_name": "db_password", "value": "secret_password123", "expected_type": ConfigType.PASSWORD}, + {"option_name": "ftp_pass", "value": "MyP@ssw0rd!", "expected_type": ConfigType.PASSWORD}, + + # URLs + {"option_name": "api_url", "value": "https://api.example.com", "expected_type": ConfigType.URL}, + {"option_name": "homepage", "value": "http://example.org", "expected_type": ConfigType.URL}, + + # IP Addresses + {"option_name": "server_ip", "value": "192.168.1.1", "expected_type": ConfigType.IP_ADDRESS}, + {"option_name": "host_ip", "value": "10.0.0.254", "expected_type": ConfigType.IP_ADDRESS}, + + # Sizes + {"option_name": "max_file_size", "value": "20MB", "expected_type": ConfigType.SIZE}, + {"option_name": "cache_size", "value": "512KB", "expected_type": ConfigType.SIZE}, + {"option_name": "disk_size", "value": "100GB", "expected_type": ConfigType.SIZE}, + + # Timeouts + {"option_name": "session_timeout", "value": "30s", "expected_type": ConfigType.TIME}, + {"option_name": "request_timeout", "value": "500ms", "expected_type": ConfigType.TIME}, + {"option_name": "retry_interval", "value": "5min", "expected_type": ConfigType.TIME}, + + # Version numbers + {"option_name": "software_version", "value": "1.2.3", "expected_type": ConfigType.VERSION_NUMBER}, + {"option_name": "api_version", "value": "v2.3.4-beta", "expected_type": ConfigType.VERSION_NUMBER}, + + # Paths + {"option_name": "log_dir", "value": "/var/logs/app/", "expected_type": ConfigType.PATH}, + {"option_name": "config_path", "value": "/etc/app/config.yaml", "expected_type": ConfigType.PATH}, + {"option_name": "home_dir", "value": "~/home/user/", "expected_type": ConfigType.PATH}, + + # File Names + {"option_name": "logfile", "value": "output.log", "expected_type": ConfigType.PATH}, + {"option_name": "config_file", "value": "settings.ini", "expected_type": ConfigType.PATH}, + + # Emails + {"option_name": "admin_email", "value": "admin@example.com", "expected_type": ConfigType.EMAIL}, + {"option_name": "support_email", "value": "support@myapp.org", "expected_type": ConfigType.EMAIL}, + + # IDs + {"option_name": "session_id", "value": "abc123xyz", "expected_type": ConfigType.ID}, + {"option_name": "user_token", "value": "token-987654321", "expected_type": ConfigType.ID}, + {"option_name": "auth_key", "value": "auth_56789", "expected_type": ConfigType.ID}, + + # Booleans + {"option_name": "enable_feature", "value": "true", "expected_type": ConfigType.BOOLEAN}, + {"option_name": "is_active", "value": "false", "expected_type": ConfigType.BOOLEAN}, + {"option_name": "debug_mode", "value": "1", "expected_type": ConfigType.BOOLEAN}, + + # Counts + {"option_name": "max_connections", "value": "100", "expected_type": ConfigType.COUNT}, + {"option_name": "retry_count", "value": "5", "expected_type": ConfigType.COUNT}, + + # Speeds + {"option_name": "download_speed", "value": "100Mbps", "expected_type": ConfigType.SPEED}, + {"option_name": "upload_speed", "value": "50Mbps", "expected_type": ConfigType.SPEED}, + + # Commands + {"option_name": "start_command", "value": "run.sh", "expected_type": ConfigType.COMMAND}, + {"option_name": "install_script", "value": "install.sh", "expected_type": ConfigType.COMMAND}, + + # Licenses + {"option_name": "software_license", "value": "MIT", "expected_type": ConfigType.LICENSE}, + {"option_name": "license_type", "value": "GPL", "expected_type": ConfigType.LICENSE}, + + # Images + {"option_name": "logo_image", "value": "logo.png", "expected_type": ConfigType.IMAGE}, + + # Modes (Booleans or toggles) + {"option_name": "dark_mode", "value": "on", "expected_type": ConfigType.MODE}, + {"option_name": "safe_mode", "value": "off", "expected_type": ConfigType.MODE}, + + # Miscellaneous + {"option_name": "system_state", "value": "active", "expected_type": ConfigType.STATE}, + {"option_name": "language", "value": "en", "expected_type": ConfigType.LANGUAGE}, + {"option_name": "hostname", "value": "localhost", "expected_type": ConfigType.HOST}, + {"option_name": "class_name", "value": "com.example.MyClass", "expected_type": ConfigType.CLASS}, +] -def test_get_config_type(): - assert ConfigTypeInferer.get_config_type("port", "8080") == ConfigType.PORT - assert ConfigTypeInferer.get_config_type("max_length", "100") == ConfigType.SIZE - assert ConfigTypeInferer.get_config_type("usr", "test") == ConfigType.USERNAME - assert ConfigTypeInferer.get_config_type("timeout", "10") == ConfigType.TIME - assert ConfigTypeInferer.get_config_type("file_path", "target/main.jar") == ConfigType.PATH - assert ConfigTypeInferer.get_config_type("url", "https://test.com/") == ConfigType.URL - assert ConfigTypeInferer.get_config_type("ip", "192.168.34.164") == ConfigType.IP_ADDRESS - assert ConfigTypeInferer.get_config_type("email", "test@gmail.com") == ConfigType.EMAIL - assert ConfigTypeInferer.get_config_type("speed", "1 bps") == ConfigType.SPEED - assert ConfigTypeInferer.get_config_type("memory", "516 GB") == ConfigType.SIZE - assert ConfigTypeInferer.get_config_type("artifact_id", "artifact_name") == ConfigType.ID - assert ConfigTypeInferer.get_config_type("password", "test1234") == ConfigType.PASSWORD - assert ConfigTypeInferer.get_config_type("count_leafs", "5") == ConfigType.COUNT - assert ConfigTypeInferer.get_config_type("domain_name", "https://192.168.34.164:8080") == ConfigType.DOMAIN_NAME - assert ConfigTypeInferer.get_config_type("server_name", "MainServer15") == ConfigType.NAME - assert ConfigTypeInferer.get_config_type("num_cores", "123123123") == ConfigType.NUMBER - assert ConfigTypeInferer.get_config_type("version_number", "1.12.12") == ConfigType.VERSION_NUMBER - assert ConfigTypeInferer.get_config_type("test", "true") == ConfigType.BOOLEAN - assert ConfigTypeInferer.get_config_type("io.file.buffer.size", "131072") == ConfigType.SIZE +@pytest.fixture(name="get_inferer") +def get_inferer_(): + inferer = ConfigTypeInferer() + return inferer -def test_file_paths(): - abs_file_path = "/home/user/github/cfgnet/src/launcher.py" - rel_file_path = "../cfgnet/src/network/network.py" - no_file_path = "test_string" - assert ConfigTypeInferer.get_config_type("", abs_file_path) == ConfigType.PATH - assert ConfigTypeInferer.get_config_type("", rel_file_path) == ConfigType.PATH - assert ConfigTypeInferer.get_config_type("", no_file_path) == ConfigType.UNKNOWN -def test_port(): - port = "8080" - not_port = "-200" +def test_get_config_type(get_inferer): + inferer = get_inferer - assert ConfigTypeInferer.get_config_type("port", port) == ConfigType.PORT - assert ConfigTypeInferer.get_config_type("", not_port) == ConfigType.UNKNOWN + assert inferer.get_config_type("port", "8080") == ConfigType.PORT + assert inferer.get_config_type("max_length", "100") == ConfigType.SIZE + assert inferer.get_config_type("usr", "test") == ConfigType.USERNAME + assert inferer.get_config_type("timeout", "10") == ConfigType.TIME + assert inferer.get_config_type("file_path", "target/main.jar") == ConfigType.PATH + assert inferer.get_config_type("url", "https://test.com/") == ConfigType.URL + assert inferer.get_config_type("ip", "192.168.34.164") == ConfigType.IP_ADDRESS + assert inferer.get_config_type("email", "test@gmail.com") == ConfigType.EMAIL + assert inferer.get_config_type("speed", "1 bps") == ConfigType.SPEED + assert inferer.get_config_type("memory", "516 GB") == ConfigType.SIZE + assert inferer.get_config_type("artifact_id", "artifact_name") == ConfigType.ID + assert inferer.get_config_type("password", "test1234") == ConfigType.PASSWORD + assert inferer.get_config_type("count_leafs", "5") == ConfigType.COUNT + assert inferer.get_config_type("domain_name", "https://192.168.34.164:8080") == ConfigType.URL + assert inferer.get_config_type("server_name", "MainServer15") == ConfigType.NAME + assert inferer.get_config_type("num_cores", "123123123") == ConfigType.NUMBER + assert inferer.get_config_type("version_number", "1.12.12") == ConfigType.VERSION_NUMBER + assert inferer.get_config_type("test", "true") == ConfigType.BOOLEAN + assert inferer.get_config_type("io.file.buffer.size", "131072") == ConfigType.SIZE -def test_version_number(): - version = "1.1.1" - version_snapshot = "3.9.0-SNAPSHOT" +def test_config_types(get_inferer): + inferer = get_inferer - assert ConfigTypeInferer.get_config_type("version", version) == ConfigType.VERSION_NUMBER - assert ConfigTypeInferer.get_config_type("version_snapshot", version_snapshot) == ConfigType.VERSION_NUMBER + for test in test_dataset: + inferred_type = inferer.get_config_type(test["option_name"], test["value"]) + print(test["option_name"], test["value"], test["expected_type"], inferred_type) + assert inferred_type == test["expected_type"] \ No newline at end of file