diff --git a/pypcode/__init__.py b/pypcode/__init__.py index c195c96..46559cf 100644 --- a/pypcode/__init__.py +++ b/pypcode/__init__.py @@ -31,7 +31,9 @@ ) from .pypcode_native import Context as _Context # pylint:disable=no-name-in-module - +from .cspec import CompilerSpec +from .pspec import ProcessorSpec +from .ldefs import LanguageDefinitions, Language __all__ = [ "Address", @@ -39,6 +41,7 @@ "Arch", "ArchLanguage", "BadDataError", + "CompilerSpec", "Context", "DecoderError", "Disassembly", @@ -52,6 +55,7 @@ "OpFormatUnary", "PcodeOp", "PcodePrettyPrinter", + "ProcessorSpec", "TranslateFlags", "Translation", "UnimplError", @@ -84,62 +88,60 @@ class ArchLanguage: ) archdir: str - ldef: ET.Element + ldef: Language - def __init__(self, archdir: str, ldef: ET.Element): + def __init__(self, archdir: str, ldef: Language): self.archdir = archdir self.ldef = ldef - self._pspec: Optional[ET.Element] = None - self._cspecs: Optional[Dict[Tuple[str, str], ET.Element]] = None + self._pspec: Optional[ProcessorSpec] = None + self._cspecs: Optional[Dict[Tuple[str, str], CompilerSpec]] = None @property def pspec_path(self) -> str: - return os.path.join(self.archdir, self.processorspec) + return os.path.join(self.archdir, self.ldef.processorspec) @property def slafile_path(self) -> str: - return os.path.join(self.archdir, self.slafile) + return os.path.join(self.archdir, self.ldef.slafile) @property def description(self) -> str: - elem = self.ldef.find("description") - if elem is not None: - return elem.text or "" - return "" + return self.ldef.description or "" def __getattr__(self, key): - if key in self.ldef.attrib: - return self.ldef.attrib[key] - raise AttributeError(key) + return getattr(self.ldef, key) @property - def pspec(self) -> Optional[ET.Element]: + def pspec(self) -> Optional[ProcessorSpec]: if self._pspec is None: - self._pspec = ET.parse(self.pspec_path).getroot() + root = ET.parse(self.pspec_path).getroot() + self._pspec = ProcessorSpec.from_element(root) return self._pspec @property - def cspecs(self) -> Mapping[Tuple[str, str], ET.Element]: + def cspecs(self) -> Mapping[Tuple[str, str], CompilerSpec]: if self._cspecs is None: self._cspecs = {} - for e in self.ldef.findall("compiler"): - path = os.path.join(self.archdir, e.attrib["spec"]) - cspec = ET.parse(path).getroot() - self._cspecs[(e.attrib["id"], e.attrib["name"])] = cspec + for e in self.ldef.compilers: + path = os.path.join(self.archdir, e.spec) + root = ET.parse(path).getroot() + cspec = CompilerSpec.from_element(root) + self._cspecs[(e.id, e.name)] = cspec return self._cspecs def init_context_from_pspec(self, ctx: "Context") -> None: if self.pspec is None: return - cd = self.pspec.find("context_data") - if cd is None: + + if self.pspec.context_data is None: return - cs = cd.find("context_set") - if cs is None: + + context_set = self.pspec.context_data.context_set + if context_set is None: return - for e in cs: - assert e.tag == "set" - ctx.setVariableDefault(e.attrib["name"], int(e.attrib["val"])) + + for name, value in context_set.values.items(): + ctx.setVariableDefault(name, value) @classmethod def from_id(cls, langid: str) -> Optional["ArchLanguage"]: @@ -169,7 +171,7 @@ class Arch: archpath: str archname: str ldefpath: str - ldef: ET.ElementTree + ldef: LanguageDefinitions languages: Sequence[ArchLanguage] def __init__(self, name: str, ldefpath: str): @@ -182,8 +184,13 @@ def __init__(self, name: str, ldefpath: str): self.archpath = os.path.dirname(ldefpath) self.archname = name self.ldefpath = ldefpath - self.ldef = ET.parse(ldefpath) - self.languages = [ArchLanguage(self.archpath, e) for e in self.ldef.getroot()] + + # Parse ldefs file into structured format + with open(ldefpath, encoding="utf8") as f: + self.ldef = LanguageDefinitions.from_xml(f.read()) + + # Create ArchLanguage objects from structured data + self.languages = [ArchLanguage(self.archpath, lang) for lang in self.ldef.languages] @classmethod def enumerate(cls) -> Generator["Arch", None, None]: diff --git a/pypcode/cspec.py b/pypcode/cspec.py new file mode 100644 index 0000000..5100284 --- /dev/null +++ b/pypcode/cspec.py @@ -0,0 +1,134 @@ +# pylint: disable=missing-class-docstring +from __future__ import annotations + +from dataclasses import dataclass, field +from xml.etree import ElementTree as ET + + +@dataclass +class DataOrganization: + absolute_max_alignment: int | None = None + machine_alignment: int | None = None + default_alignment: int | None = None + default_pointer_alignment: int | None = None + wchar_size: int | None = None + short_size: int | None = None + integer_size: int | None = None + long_size: int | None = None + long_long_size: int | None = None + float_size: int | None = None + double_size: int | None = None + long_double_size: int | None = None + size_alignment_map: dict[int, int] = field(default_factory=dict) + bitfield_packing_uses_ms: bool = False + + @classmethod + def from_element(cls, element: ET.Element) -> DataOrganization: + if element is None: + return cls() + + def get_int_attr(elem: ET.Element, attr: str) -> int | None: + # Handle both attribute and element value cases + if attr in elem.attrib: + return int(elem.attrib[attr]) + # Look for a child element with this name + child = elem.find(attr) + if child is not None and "value" in child.attrib: + return int(child.attrib["value"]) + return None + + alignment_map = {} + map_elem = element.find("size_alignment_map") + if map_elem is not None: + for entry in map_elem.findall("entry"): + size = int(entry.attrib["size"]) + alignment = int(entry.attrib["alignment"]) + alignment_map[size] = alignment + + packing_elem = element.find("bitfield_packing") + uses_ms = False + if packing_elem is not None: + ms_conv = packing_elem.find("use_MS_convention") + uses_ms = ms_conv is not None and ms_conv.attrib.get("value", "false").lower() == "true" + + return cls( + absolute_max_alignment=get_int_attr(element, "absolute_max_alignment"), + machine_alignment=get_int_attr(element, "machine_alignment"), + default_alignment=get_int_attr(element, "default_alignment"), + default_pointer_alignment=get_int_attr(element, "default_pointer_alignment"), + wchar_size=get_int_attr(element, "wchar_size"), + short_size=get_int_attr(element, "short_size"), + integer_size=get_int_attr(element, "integer_size"), + long_size=get_int_attr(element, "long_size"), + long_long_size=get_int_attr(element, "long_long_size"), + float_size=get_int_attr(element, "float_size"), + double_size=get_int_attr(element, "double_size"), + long_double_size=get_int_attr(element, "long_double_size"), + size_alignment_map=alignment_map, + bitfield_packing_uses_ms=uses_ms, + ) + + +@dataclass +class GlobalScope: + ram_present: bool = False + registers: list[str] = field(default_factory=list) + + @classmethod + def from_element(cls, element: ET.Element) -> GlobalScope: + if element is None: + return cls() + + ram_present = any(r.attrib.get("space", "") == "ram" for r in element.findall("range")) + registers = [r.attrib["name"] for r in element.findall("register")] + + return cls(ram_present=ram_present, registers=registers) + + +@dataclass +class CompilerSpec: + data_organization: DataOrganization = field(default_factory=DataOrganization) + global_scope: GlobalScope = field(default_factory=GlobalScope) + stackpointer_register: str | None = None + returnaddress_register: str | None = None + returnaddress_space: str | None = None + returnaddress_offset: int | None = None + returnaddress_size: int | None = None + + @classmethod + def from_xml(cls, xml_string: str) -> CompilerSpec: + root = ET.fromstring(xml_string) + return cls.from_element(root) + + @classmethod + def from_element(cls, element: ET.Element) -> CompilerSpec: + data_org_elem = element.find("data_organization") + data_org = DataOrganization.from_element(data_org_elem) if data_org_elem is not None else DataOrganization() + global_elem = element.find("global") + global_scope = GlobalScope.from_element(global_elem) if global_elem is not None else GlobalScope() + + sp_elem = element.find("stackpointer") + stackpointer = sp_elem.attrib["register"] if sp_elem is not None else None + + ret_elem = element.find("returnaddress") + retaddr_reg = retaddr_space = None + retaddr_offset = retaddr_size = None + if ret_elem is not None: + if "register" in ret_elem.attrib: + retaddr_reg = ret_elem.attrib["register"] + else: + var_elem = ret_elem.find("varnode") + if var_elem is not None: + retaddr_space = var_elem.attrib["space"] + retaddr_offset = int(var_elem.attrib["offset"]) + retaddr_size = int(var_elem.attrib["size"]) + + return cls( + data_organization=data_org, + global_scope=global_scope, + stackpointer_register=stackpointer, + returnaddress_register=retaddr_reg, + returnaddress_space=retaddr_space, + returnaddress_offset=retaddr_offset, + returnaddress_size=retaddr_size, + ) diff --git a/pypcode/ldefs.py b/pypcode/ldefs.py new file mode 100644 index 0000000..24dd99c --- /dev/null +++ b/pypcode/ldefs.py @@ -0,0 +1,78 @@ +# pylint: disable=missing-class-docstring +from __future__ import annotations + +from dataclasses import dataclass, field +from xml.etree import ElementTree as ET + + +@dataclass +class ExternalName: + tool: str + name: str + + @classmethod + def from_element(cls, element: ET.Element) -> ExternalName: + return cls(tool=element.attrib["tool"], name=element.attrib["name"]) + + +@dataclass +class Compiler: + name: str + spec: str + id: str + + @classmethod + def from_element(cls, element: ET.Element) -> Compiler: + return cls(name=element.attrib["name"], spec=element.attrib["spec"], id=element.attrib["id"]) + + +@dataclass +class Language: + processor: str + endian: str + size: int + variant: str + version: str + slafile: str + processorspec: str + id: str + description: str | None = None + manualindexfile: str | None = None + instructionEndian: str | None = None + compilers: list[Compiler] = field(default_factory=list) + external_names: list[ExternalName] = field(default_factory=list) + + @classmethod + def from_element(cls, element: ET.Element) -> Language: + desc_elem = element.find("description") + description = desc_elem.text if desc_elem is not None else "" + + return cls( + processor=element.attrib["processor"], + endian=element.attrib["endian"], + size=int(element.attrib["size"]), + variant=element.attrib["variant"], + version=element.attrib["version"], + slafile=element.attrib["slafile"], + processorspec=element.attrib["processorspec"], + id=element.attrib["id"], + description=description, + manualindexfile=element.attrib.get("manualindexfile"), + instructionEndian=element.attrib.get("instructionEndian"), + compilers=[Compiler.from_element(e) for e in element.findall("compiler")], + external_names=[ExternalName.from_element(e) for e in element.findall("external_name")], + ) + + +@dataclass +class LanguageDefinitions: + languages: list[Language] = field(default_factory=list) + + @classmethod + def from_xml(cls, xml_string: str) -> LanguageDefinitions: + root = ET.fromstring(xml_string) + return cls.from_element(root) + + @classmethod + def from_element(cls, element: ET.Element) -> LanguageDefinitions: + return cls(languages=[Language.from_element(e) for e in element.findall("language")]) diff --git a/pypcode/pspec.py b/pypcode/pspec.py new file mode 100644 index 0000000..79b0e40 --- /dev/null +++ b/pypcode/pspec.py @@ -0,0 +1,193 @@ +# pylint: disable=missing-class-docstring +from __future__ import annotations + +from dataclasses import dataclass, field +from xml.etree import ElementTree as ET + + +@dataclass +class Property: + key: str + value: str + + @classmethod + def from_element(cls, element: ET.Element) -> Property: + return cls(key=element.attrib["key"], value=element.attrib["value"]) + + +@dataclass +class Properties: + properties: dict[str, str] = field(default_factory=dict) + + @classmethod + def from_element(cls, element: ET.Element) -> Properties: + props = cls() + if element is not None: + for prop in element.findall("property"): + p = Property.from_element(prop) + props.properties[p.key] = p.value + return props + + +@dataclass +class ContextSet: + space: str + values: dict[str, int] + description: dict[str, str] + + @classmethod + def from_element(cls, element: ET.Element) -> ContextSet: + values = {} + descriptions = {} + for set_elem in element.findall("set"): + name = set_elem.attrib["name"] + values[name] = int(set_elem.attrib["val"]) + if "description" in set_elem.attrib: + descriptions[name] = set_elem.attrib["description"] + return cls(space=element.attrib["space"], values=values, description=descriptions) + + +@dataclass +class TrackedSet: + space: str + values: dict[str, int] + + @classmethod + def from_element(cls, element: ET.Element) -> TrackedSet: + values = {} + for set_elem in element.findall("set"): + val_str = set_elem.attrib["val"] + if val_str.startswith("0x"): + values[set_elem.attrib["name"]] = int(val_str, 16) + else: + values[set_elem.attrib["name"]] = int(val_str) + return cls(space=element.attrib["space"], values=values) + + +@dataclass +class ContextData: + context_set: ContextSet | None = None + tracked_set: TrackedSet | None = None + + @classmethod + def from_element(cls, element: ET.Element) -> ContextData: + context_set_elem = element.find("context_set") + tracked_set_elem = element.find("tracked_set") + + return cls( + context_set=ContextSet.from_element(context_set_elem) if context_set_elem is not None else None, + tracked_set=TrackedSet.from_element(tracked_set_elem) if tracked_set_elem is not None else None, + ) + + +@dataclass +class Register: + name: str + group: str | None = None + vector_lane_sizes: set[int] | None = None + hidden: bool = False + + @classmethod + def from_element(cls, element: ET.Element) -> Register: + vector_sizes = None + if "vector_lane_sizes" in element.attrib: + vector_sizes = {int(x) for x in element.attrib["vector_lane_sizes"].split(",")} + + return cls( + name=element.attrib["name"], + group=element.attrib.get("group"), + vector_lane_sizes=vector_sizes, + hidden=element.attrib.get("hidden", "false").lower() == "true", + ) + + +@dataclass +class RegisterData: + registers: list[Register] = field(default_factory=list) + + @classmethod + def from_element(cls, element: ET.Element) -> RegisterData: + registers = [] + if element is not None: + for reg in element.findall("register"): + registers.append(Register.from_element(reg)) + return cls(registers=registers) + + +@dataclass +class DefaultSymbol: + name: str + address: str + entry: bool + type: str | None = None + + @classmethod + def from_element(cls, element: ET.Element) -> DefaultSymbol: + return cls( + name=element.attrib["name"], + address=element.attrib["address"], + entry=element.attrib.get("entry", "false").lower() == "true", + type=element.attrib.get("type"), + ) + + +@dataclass +class DefaultSymbols: + symbols: list[DefaultSymbol] = field(default_factory=list) + + @classmethod + def from_element(cls, element: ET.Element) -> DefaultSymbols: + symbols = [] + if element is not None: + for sym in element.findall("symbol"): + symbols.append(DefaultSymbol.from_element(sym)) + return cls(symbols=symbols) + + +@dataclass +class ProcessorSpec: + properties: Properties | None + programcounter: str | None + context_data: ContextData | None + register_data: RegisterData | None + default_symbols: DefaultSymbols | None + incidentalcopy: list[str] | None = None + + @classmethod + def from_xml(cls, xml_string: str) -> ProcessorSpec: + root = ET.fromstring(xml_string) + return cls.from_element(root) + + @classmethod + def from_element(cls, element: ET.Element) -> ProcessorSpec: + props_elem = element.find("properties") + props = Properties.from_element(props_elem) if props_elem is not None else None + + pc_elem = element.find("programcounter") + pc_reg = pc_elem.attrib["register"] if pc_elem is not None else None + + context_data_elem = element.find("context_data") + context_data = ContextData.from_element(context_data_elem) if context_data_elem is not None else None + + register_data_elem = element.find("register_data") + register_data = RegisterData.from_element(register_data_elem) if register_data_elem is not None else None + + default_symbols_elem = element.find("default_symbols") + default_symbols = ( + DefaultSymbols.from_element(default_symbols_elem) if default_symbols_elem is not None else None + ) + + incidentalcopy = [] + incidentalcopy_elem = element.find("incidentalcopy") + if incidentalcopy_elem is not None: + for reg in incidentalcopy_elem.findall("register"): + incidentalcopy.append(reg.attrib["name"]) + + return cls( + properties=props, + programcounter=pc_reg, + context_data=context_data, + register_data=register_data, + default_symbols=default_symbols, + incidentalcopy=incidentalcopy if incidentalcopy else None, + ) diff --git a/pyproject.toml b/pyproject.toml index 323d806..2ee6e60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,3 +11,6 @@ line-length = 120 extend-ignore = [ "E402", # Bottom imports ] +exclude = [ + "pypcode/processors" +] diff --git a/tests/test_cspec.py b/tests/test_cspec.py new file mode 100644 index 0000000..ba4e850 --- /dev/null +++ b/tests/test_cspec.py @@ -0,0 +1,167 @@ +import unittest +from pathlib import Path + +from pypcode.cspec import CompilerSpec + +# Get all .cspec files from processors directory +PROCESSORS_DIR = Path(__file__).parent.parent / "pypcode" / "processors" + + +class TestCompilerSpec(unittest.TestCase): + """Test cases for compiler spec parsing""" + + def setUp(self): + self.all_cspecs = list(PROCESSORS_DIR.glob("**/data/languages/*.cspec")) + self.assertTrue(len(self.all_cspecs) > 0, "No cspec files found") + + def test_all_cspecs(self): + """Test that we can parse all compiler specs without errors.""" + for cspec_path in self.all_cspecs: + with self.subTest(cspec_path=cspec_path): + with open(cspec_path, encoding="utf8") as f: + cspec = CompilerSpec.from_xml(f.read()) + + # Basic validation of parsed data + self.assertIsNotNone(cspec.data_organization) + self.assertIsNotNone(cspec.global_scope) + + def test_x86_64_win_cspec(self): + """Test specific x86-64 Windows compiler spec.""" + cspec_path = PROCESSORS_DIR / "x86" / "data" / "languages" / "x86-64-win.cspec" + with open(cspec_path, encoding="utf8") as f: + cspec = CompilerSpec.from_xml(f.read()) + + # Test data organization + data_org = cspec.data_organization + self.assertEqual(data_org.wchar_size, 2) + self.assertEqual(data_org.long_size, 4) + self.assertEqual(data_org.long_long_size, 8) + + # Test size alignment map + self.assertEqual(data_org.size_alignment_map[1], 1) + self.assertEqual(data_org.size_alignment_map[2], 2) + self.assertEqual(data_org.size_alignment_map[4], 4) + self.assertEqual(data_org.size_alignment_map[8], 8) + + # Test bitfield packing + self.assertTrue(data_org.bitfield_packing_uses_ms) + + # Test global scope + self.assertTrue(cspec.global_scope.ram_present) + self.assertIn("MXCSR", cspec.global_scope.registers) + + # Test stack and return info + self.assertEqual(cspec.stackpointer_register, "RSP") + self.assertEqual(cspec.returnaddress_space, "stack") + self.assertEqual(cspec.returnaddress_offset, 0) + self.assertEqual(cspec.returnaddress_size, 8) + + def test_x86_64_gcc_cspec(self): + """Test specific x86-64 GCC compiler spec.""" + cspec_path = PROCESSORS_DIR / "x86" / "data" / "languages" / "x86-64-gcc.cspec" + with open(cspec_path, encoding="utf8") as f: + cspec = CompilerSpec.from_xml(f.read()) + + # Test data organization + data_org = cspec.data_organization + self.assertEqual(data_org.wchar_size, 4) + self.assertEqual(data_org.long_size, 8) + self.assertEqual(data_org.long_double_size, 10) # Note: aligned length is 16 + + # Test size alignment map + self.assertEqual(data_org.size_alignment_map[1], 1) + self.assertEqual(data_org.size_alignment_map[2], 2) + self.assertEqual(data_org.size_alignment_map[4], 4) + self.assertEqual(data_org.size_alignment_map[8], 8) + self.assertEqual(data_org.size_alignment_map[16], 16) + + # Test global scope + self.assertTrue(cspec.global_scope.ram_present) + self.assertIn("MXCSR", cspec.global_scope.registers) + + # Test stack and return info + self.assertEqual(cspec.stackpointer_register, "RSP") + self.assertEqual(cspec.returnaddress_space, "stack") + self.assertEqual(cspec.returnaddress_offset, 0) + self.assertEqual(cspec.returnaddress_size, 8) + + def test_aarch64_cspec(self): + """Test specific AArch64 compiler spec.""" + cspec_path = PROCESSORS_DIR / "AARCH64" / "data" / "languages" / "AARCH64.cspec" + with open(cspec_path, encoding="utf8") as f: + cspec = CompilerSpec.from_xml(f.read()) + + # Test data organization + data_org = cspec.data_organization + self.assertEqual(data_org.wchar_size, 4) + self.assertEqual(data_org.long_size, 8) + self.assertEqual(data_org.double_size, 8) + + # Test size alignment map + self.assertEqual(data_org.size_alignment_map[1], 1) + self.assertEqual(data_org.size_alignment_map[2], 2) + self.assertEqual(data_org.size_alignment_map[4], 4) + self.assertEqual(data_org.size_alignment_map[8], 8) + self.assertEqual(data_org.size_alignment_map[16], 16) + + # Test global scope + self.assertTrue(cspec.global_scope.ram_present) + + # Test stack pointer + self.assertEqual(cspec.stackpointer_register, "sp") + + def test_x86_32_gcc_cspec(self): + """Test specific x86 32-bit GCC compiler spec.""" + cspec_path = PROCESSORS_DIR / "x86" / "data" / "languages" / "x86gcc.cspec" + with open(cspec_path, encoding="utf8") as f: + cspec = CompilerSpec.from_xml(f.read()) + + # Test data organization + data_org = cspec.data_organization + self.assertEqual(data_org.wchar_size, 4) + self.assertEqual(data_org.long_size, 4) + self.assertEqual(data_org.long_long_size, 8) + + # Test size alignment map + self.assertEqual(data_org.size_alignment_map[1], 1) + self.assertEqual(data_org.size_alignment_map[2], 2) + self.assertEqual(data_org.size_alignment_map[4], 4) + self.assertEqual(data_org.size_alignment_map[8], 4) + + # Test global scope + self.assertTrue(cspec.global_scope.ram_present) + self.assertIn("MXCSR", cspec.global_scope.registers) + + # Test stack and return info + self.assertEqual(cspec.stackpointer_register, "ESP") + self.assertEqual(cspec.returnaddress_space, "stack") + self.assertEqual(cspec.returnaddress_offset, 0) + self.assertEqual(cspec.returnaddress_size, 4) # 32-bit return address + + def test_arm_32_cspec(self): + """Test specific ARM 32-bit compiler spec.""" + cspec_path = PROCESSORS_DIR / "ARM" / "data" / "languages" / "ARM.cspec" + with open(cspec_path, encoding="utf8") as f: + cspec = CompilerSpec.from_xml(f.read()) + + # Test data organization + data_org = cspec.data_organization + self.assertEqual(data_org.wchar_size, 4) + self.assertEqual(data_org.long_size, 4) + self.assertEqual(data_org.double_size, 8) + + # Test size alignment map + self.assertEqual(data_org.size_alignment_map[1], 1) + self.assertEqual(data_org.size_alignment_map[2], 2) + self.assertEqual(data_org.size_alignment_map[4], 4) + self.assertEqual(data_org.size_alignment_map[8], 8) + + # Test global scope + self.assertTrue(cspec.global_scope.ram_present) + + # Test stack pointer + self.assertEqual(cspec.stackpointer_register, "sp") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_ldefs.py b/tests/test_ldefs.py new file mode 100644 index 0000000..43b3c8c --- /dev/null +++ b/tests/test_ldefs.py @@ -0,0 +1,103 @@ +import unittest +from pathlib import Path + +from pypcode.ldefs import LanguageDefinitions + +# Get all .ldefs files from processors directory +PROCESSORS_DIR = Path(__file__).parent.parent / "pypcode" / "processors" + + +class TestLanguageDefinitions(unittest.TestCase): + """Test cases for language definition parsing""" + + def setUp(self): + self.all_ldefs = list(PROCESSORS_DIR.glob("**/data/languages/*.ldefs")) + self.assertTrue(len(self.all_ldefs) > 0, "No ldefs files found") + + def test_all_ldefs(self): + """Test that we can parse all language definition files without errors.""" + for ldefs_path in self.all_ldefs: + with self.subTest(ldefs_path=ldefs_path): + with open(ldefs_path, encoding="utf8") as f: + ldefs = LanguageDefinitions.from_xml(f.read()) + + # Basic validation of parsed data + self.assertIsNotNone(ldefs.languages) + for lang in ldefs.languages: + self.assertIsInstance(lang.processor, str) + self.assertIsInstance(lang.endian, str) + self.assertIsInstance(lang.size, int) + self.assertIsInstance(lang.variant, str) + self.assertIsInstance(lang.version, str) + self.assertIsInstance(lang.slafile, str) + self.assertIsInstance(lang.processorspec, str) + self.assertIsInstance(lang.id, str) + + def test_x86_ldefs(self): + """Test specific x86 language definitions.""" + ldefs_path = PROCESSORS_DIR / "x86" / "data" / "languages" / "x86.ldefs" + with open(ldefs_path, encoding="utf8") as f: + ldefs = LanguageDefinitions.from_xml(f.read()) + + # Test basic properties + x86_default = next(ldef for ldef in ldefs.languages if ldef.variant == "default") + self.assertEqual(x86_default.processor, "x86") + self.assertEqual(x86_default.endian, "little") + self.assertEqual(x86_default.size, 32) + self.assertEqual(x86_default.id, "x86:LE:32:default") + self.assertEqual(x86_default.description, "Intel/AMD 32-bit x86") + + # Test compiler specs + compiler_names = {c.name for c in x86_default.compilers} + self.assertIn("Visual Studio", compiler_names) + self.assertIn("gcc", compiler_names) + self.assertIn("golang", compiler_names) + + vs_compiler = next(c for c in x86_default.compilers if c.name == "Visual Studio") + self.assertEqual(vs_compiler.spec, "x86win.cspec") + self.assertEqual(vs_compiler.id, "windows") + + # Test external names + external_tools = {e.tool for e in x86_default.external_names} + self.assertIn("gnu", external_tools) + self.assertIn("IDA-PRO", external_tools) + self.assertIn("DWARF.register.mapping.file", external_tools) + + ida_names = [e.name for e in x86_default.external_names if e.tool == "IDA-PRO"] + self.assertIn("metapc", ida_names) + + def test_arm_ldefs(self): + """Test specific ARM language definitions.""" + ldefs_path = PROCESSORS_DIR / "ARM" / "data" / "languages" / "ARM.ldefs" + with open(ldefs_path, encoding="utf8") as f: + ldefs = LanguageDefinitions.from_xml(f.read()) + + # Test that we have both BE and LE variants + variants = {ldef.endian for ldef in ldefs.languages} + self.assertIn("big", variants) + self.assertIn("little", variants) + + # Test ARM v8 little endian variant + arm_v8_le = next(ldef for ldef in ldefs.languages if ldef.id == "ARM:LE:32:v8") + self.assertEqual(arm_v8_le.processor, "ARM") + self.assertEqual(arm_v8_le.size, 32) + self.assertEqual(arm_v8_le.variant, "v8") + self.assertEqual(arm_v8_le.slafile, "ARM8_le.sla") + self.assertEqual(arm_v8_le.description, "Generic ARM/Thumb v8 little endian") + + # Test compilers + compiler_dict = {c.id: c for c in arm_v8_le.compilers} + self.assertIn("default", compiler_dict) + self.assertIn("windows", compiler_dict) + self.assertEqual(compiler_dict["windows"].name, "Visual Studio") + self.assertEqual(compiler_dict["windows"].spec, "ARM_win.cspec") + + # Test external names + external_dict = {e.tool: e.name for e in arm_v8_le.external_names} + self.assertEqual(external_dict["qemu"], "qemu-arm") + self.assertEqual(external_dict["IDA-PRO"], "arm") + self.assertEqual(external_dict["DWARF.register.mapping.file"], "ARMneon.dwarf") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_pspec.py b/tests/test_pspec.py new file mode 100644 index 0000000..d9e22e5 --- /dev/null +++ b/tests/test_pspec.py @@ -0,0 +1,142 @@ +import unittest +from pathlib import Path + +from pypcode.pspec import ProcessorSpec + +# Get all .pspec files from processors directory +PROCESSORS_DIR = Path(__file__).parent.parent / "pypcode" / "processors" + + +class TestProcessorSpec(unittest.TestCase): + """Test cases for processor spec parsing""" + + def setUp(self): + self.all_pspecs = list(PROCESSORS_DIR.glob("**/data/languages/*.pspec")) + self.assertTrue(len(self.all_pspecs) > 0, "No pspec files found") + + def test_all_pspecs(self): + """Test that we can parse all processor specs without errors.""" + for pspec_path in self.all_pspecs: + with self.subTest(pspec_path=pspec_path): + with open(pspec_path, encoding="utf8") as f: + pspec = ProcessorSpec.from_xml(f.read()) + + # Basic validation of parsed data + if pspec.programcounter: + self.assertIsInstance(pspec.programcounter, str) + + if pspec.context_data: + if pspec.context_data.context_set: + self.assertIsInstance(pspec.context_data.context_set.space, str) + self.assertIsInstance(pspec.context_data.context_set.values, dict) + for k, v in pspec.context_data.context_set.values.items(): + self.assertIsInstance(k, str) + self.assertIsInstance(v, int) + + if pspec.context_data.tracked_set: + self.assertIsInstance(pspec.context_data.tracked_set.space, str) + self.assertIsInstance(pspec.context_data.tracked_set.values, dict) + for k, v in pspec.context_data.tracked_set.values.items(): + self.assertIsInstance(k, str) + self.assertIsInstance(v, int) + + if pspec.register_data: + self.assertIsInstance(pspec.register_data.registers, list) + for reg in pspec.register_data.registers: + self.assertIsInstance(reg.name, str) + if reg.group: + self.assertIsInstance(reg.group, str) + if reg.vector_lane_sizes: + self.assertIsInstance(reg.vector_lane_sizes, set) + for x in reg.vector_lane_sizes: + self.assertIsInstance(x, int) + self.assertIsInstance(reg.hidden, bool) + + if pspec.default_symbols: + self.assertIsInstance(pspec.default_symbols.symbols, list) + for sym in pspec.default_symbols.symbols: + self.assertIsInstance(sym.name, str) + self.assertIsInstance(sym.address, str) + self.assertIsInstance(sym.entry, bool) + if sym.type: + self.assertIsInstance(sym.type, str) + + def test_x86_pspec(self): + """Test specific x86 processor spec.""" + pspec_path = PROCESSORS_DIR / "x86" / "data" / "languages" / "x86.pspec" + with open(pspec_path, encoding="utf8") as f: + pspec = ProcessorSpec.from_xml(f.read()) + + # Test specific x86 properties + self.assertEqual(pspec.programcounter, "EIP") + self.assertIn("useOperandReferenceAnalyzerSwitchTables", pspec.properties.properties) + self.assertEqual(pspec.properties.properties["assemblyRating:x86:LE:32:default"], "GOLD") + + # Test context data + self.assertIsNotNone(pspec.context_data) + self.assertIsNotNone(pspec.context_data.context_set) + self.assertEqual(pspec.context_data.context_set.values["addrsize"], 1) + self.assertEqual(pspec.context_data.context_set.values["opsize"], 1) + + # Test tracked set + self.assertIsNotNone(pspec.context_data.tracked_set) + self.assertEqual(pspec.context_data.tracked_set.values["DF"], 0) + + # Test register data + self.assertIsNotNone(pspec.register_data) + reg_names = {r.name for r in pspec.register_data.registers} + + # Test for registers we know are explicitly defined + self.assertIn("XMM0", reg_names) # AVX register + self.assertIn("CF", reg_names) # Flags register + self.assertIn("DR0", reg_names) # Debug register + + # Test AVX registers have correct vector lane sizes + xmm0 = next(r for r in pspec.register_data.registers if r.name == "XMM0") + self.assertEqual(xmm0.vector_lane_sizes, {1, 2, 4, 8}) + self.assertEqual(xmm0.group, "AVX") + + def test_arm_cortex_pspec(self): + """Test specific ARM Cortex processor spec.""" + pspec_path = PROCESSORS_DIR / "ARM" / "data" / "languages" / "ARMCortex.pspec" + with open(pspec_path, encoding="utf8") as f: + pspec = ProcessorSpec.from_xml(f.read()) + + # Test ARM Cortex specific properties + self.assertEqual(pspec.programcounter, "pc") + self.assertEqual(pspec.properties.properties["addressesDoNotAppearDirectlyInCode"], "true") + + # Test context data + self.assertIsNotNone(pspec.context_data) + self.assertIsNotNone(pspec.context_data.context_set) + self.assertEqual(pspec.context_data.context_set.values["TMode"], 1) + self.assertIn("THUMB", pspec.context_data.context_set.description["TMode"]) + + # Test default symbols + self.assertIsNotNone(pspec.default_symbols) + sym_dict = {s.name: s for s in pspec.default_symbols.symbols} + self.assertIn("Reset", sym_dict) + self.assertEqual(sym_dict["Reset"].address, "ram:0x4") + self.assertTrue(sym_dict["Reset"].entry) + self.assertEqual(sym_dict["Reset"].type, "code_ptr") + + def test_unknown_elements(self): + """Test that unknown XML elements don't cause errors.""" + test_xml = """ + + + + + + test + + + + """ + pspec = ProcessorSpec.from_xml(test_xml) + self.assertEqual(pspec.properties.properties["test"], "value") + self.assertEqual(pspec.programcounter, "pc") + + +if __name__ == "__main__": + unittest.main()