Skip to content

Commit

Permalink
Impement python types for xml sleigh files
Browse files Browse the repository at this point in the history
  • Loading branch information
twizmwazin committed Dec 14, 2024
1 parent a245227 commit 4acdee9
Show file tree
Hide file tree
Showing 7 changed files with 861 additions and 31 deletions.
72 changes: 41 additions & 31 deletions pypcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,17 @@
)

from .pypcode_native import Context as _Context # pylint:disable=no-name-in-module

from .cspec import CompilerSpec
from .pspec import ProcessorSpec
from .ldefs import LanguageDefinitions, Language

__all__ = [
"Address",
"AddrSpace",
"Arch",
"ArchLanguage",
"BadDataError",
"CompilerSpec",
"Context",
"DecoderError",
"Disassembly",
Expand All @@ -52,6 +55,7 @@
"OpFormatUnary",
"PcodeOp",
"PcodePrettyPrinter",
"ProcessorSpec",
"TranslateFlags",
"Translation",
"UnimplError",
Expand Down Expand Up @@ -84,62 +88,63 @@ class ArchLanguage:
)

archdir: str
ldef: ET.Element
ldef: Language

def __init__(self, archdir: str, ldef: ET.Element):
def __init__(self, archdir: str, ldef: Language):
self.archdir = archdir
self.ldef = ldef
self._pspec: Optional[ET.Element] = None
self._cspecs: Optional[Dict[Tuple[str, str], ET.Element]] = None
self._pspec: Optional[ProcessorSpec] = None
self._cspecs: Optional[Dict[Tuple[str, str], CompilerSpec]] = None

@property
def pspec_path(self) -> str:
return os.path.join(self.archdir, self.processorspec)
return os.path.join(self.archdir, self.ldef.processorspec)

@property
def slafile_path(self) -> str:
return os.path.join(self.archdir, self.slafile)
return os.path.join(self.archdir, self.ldef.slafile)

@property
def description(self) -> str:
elem = self.ldef.find("description")
if elem is not None:
return elem.text or ""
return ""
return self.ldef.description or ""

def __getattr__(self, key):
if key in self.ldef.attrib:
return self.ldef.attrib[key]
raise AttributeError(key)
return getattr(self.ldef, key)

@property
def pspec(self) -> Optional[ET.Element]:
def pspec(self) -> Optional[ProcessorSpec]:
if self._pspec is None:
self._pspec = ET.parse(self.pspec_path).getroot()
try:
root = ET.parse(self.pspec_path).getroot()
self._pspec = ProcessorSpec.from_element(root)
except Exception:
return None
return self._pspec

@property
def cspecs(self) -> Mapping[Tuple[str, str], ET.Element]:
def cspecs(self) -> Mapping[Tuple[str, str], CompilerSpec]:
if self._cspecs is None:
self._cspecs = {}
for e in self.ldef.findall("compiler"):
path = os.path.join(self.archdir, e.attrib["spec"])
cspec = ET.parse(path).getroot()
self._cspecs[(e.attrib["id"], e.attrib["name"])] = cspec
for e in self.ldef.compilers:
path = os.path.join(self.archdir, e.spec)
root = ET.parse(path).getroot()
cspec = CompilerSpec.from_element(root)
self._cspecs[(e.id, e.name)] = cspec
return self._cspecs

def init_context_from_pspec(self, ctx: "Context") -> None:
if self.pspec is None:
return
cd = self.pspec.find("context_data")
if cd is None:

if self.pspec.context_data is None:
return
cs = cd.find("context_set")
if cs is None:

context_set = self.pspec.context_data.context_set
if context_set is None:
return
for e in cs:
assert e.tag == "set"
ctx.setVariableDefault(e.attrib["name"], int(e.attrib["val"]))

for name, value in context_set.values.items():
ctx.setVariableDefault(name, value)

@classmethod
def from_id(cls, langid: str) -> Optional["ArchLanguage"]:
Expand Down Expand Up @@ -169,7 +174,7 @@ class Arch:
archpath: str
archname: str
ldefpath: str
ldef: ET.ElementTree
ldef: LanguageDefinitions
languages: Sequence[ArchLanguage]

def __init__(self, name: str, ldefpath: str):
Expand All @@ -182,8 +187,13 @@ def __init__(self, name: str, ldefpath: str):
self.archpath = os.path.dirname(ldefpath)
self.archname = name
self.ldefpath = ldefpath
self.ldef = ET.parse(ldefpath)
self.languages = [ArchLanguage(self.archpath, e) for e in self.ldef.getroot()]

# Parse ldefs file into structured format
with open(ldefpath, "r") as f:
self.ldef = LanguageDefinitions.from_xml(f.read())

# Create ArchLanguage objects from structured data
self.languages = [ArchLanguage(self.archpath, lang) for lang in self.ldef.languages]

@classmethod
def enumerate(cls) -> Generator["Arch", None, None]:
Expand Down
137 changes: 137 additions & 0 deletions pypcode/cspec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Dict, List, Optional
from xml.etree import ElementTree as ET


@dataclass
class DataOrganization:
absolute_max_alignment: Optional[int] = None
machine_alignment: Optional[int] = None
default_alignment: Optional[int] = None
default_pointer_alignment: Optional[int] = None
wchar_size: Optional[int] = None
short_size: Optional[int] = None
integer_size: Optional[int] = None
long_size: Optional[int] = None
long_long_size: Optional[int] = None
float_size: Optional[int] = None
double_size: Optional[int] = None
long_double_size: Optional[int] = None
size_alignment_map: Dict[int, int] = field(default_factory=dict)
bitfield_packing_uses_ms: bool = False

@classmethod
def from_element(cls, element: ET.Element) -> DataOrganization:
if element is None:
return cls()

def get_int_attr(elem: ET.Element, attr: str) -> Optional[int]:
# Handle both attribute and element value cases
if attr in elem.attrib:
return int(elem.attrib[attr])
# Look for a child element with this name
child = elem.find(attr)
if child is not None and "value" in child.attrib:
return int(child.attrib["value"])
return None

alignment_map = {}
map_elem = element.find("size_alignment_map")
if map_elem is not None:
for entry in map_elem.findall("entry"):
size = int(entry.attrib["size"])
alignment = int(entry.attrib["alignment"])
alignment_map[size] = alignment

packing_elem = element.find("bitfield_packing")
uses_ms = False
if packing_elem is not None:
ms_conv = packing_elem.find("use_MS_convention")
uses_ms = (
ms_conv is not None
and ms_conv.attrib.get("value", "false").lower() == "true"
)

return cls(
absolute_max_alignment=get_int_attr(element, "absolute_max_alignment"),
machine_alignment=get_int_attr(element, "machine_alignment"),
default_alignment=get_int_attr(element, "default_alignment"),
default_pointer_alignment=get_int_attr(element, "default_pointer_alignment"),
wchar_size=get_int_attr(element, "wchar_size"),
short_size=get_int_attr(element, "short_size"),
integer_size=get_int_attr(element, "integer_size"),
long_size=get_int_attr(element, "long_size"),
long_long_size=get_int_attr(element, "long_long_size"),
float_size=get_int_attr(element, "float_size"),
double_size=get_int_attr(element, "double_size"),
long_double_size=get_int_attr(element, "long_double_size"),
size_alignment_map=alignment_map,
bitfield_packing_uses_ms=uses_ms,
)


@dataclass
class GlobalScope:
ram_present: bool = False
registers: List[str] = field(default_factory=list)

@classmethod
def from_element(cls, element: ET.Element) -> GlobalScope:
if element is None:
return cls()

ram_present = any(r.attrib.get("space", "") == "ram" for r in element.findall("range"))
registers = [r.attrib["name"] for r in element.findall("register")]

return cls(ram_present=ram_present, registers=registers)


@dataclass
class CompilerSpec:
data_organization: DataOrganization = field(default_factory=DataOrganization)
global_scope: GlobalScope = field(default_factory=GlobalScope)
stackpointer_register: Optional[str] = None
returnaddress_register: Optional[str] = None
returnaddress_space: Optional[str] = None
returnaddress_offset: Optional[int] = None
returnaddress_size: Optional[int] = None

@classmethod
def from_xml(cls, xml_string: str) -> CompilerSpec:
root = ET.fromstring(xml_string)
return cls.from_element(root)

@classmethod
def from_element(cls, element: ET.Element) -> CompilerSpec:
data_org_elem = element.find("data_organization")
data_org = DataOrganization.from_element(data_org_elem) if data_org_elem is not None else DataOrganization()
global_elem = element.find("global")
global_scope = GlobalScope.from_element(global_elem) if global_elem is not None else GlobalScope()

sp_elem = element.find("stackpointer")
stackpointer = sp_elem.attrib["register"] if sp_elem is not None else None

ret_elem = element.find("returnaddress")
retaddr_reg = retaddr_space = None
retaddr_offset = retaddr_size = None
if ret_elem is not None:
if "register" in ret_elem.attrib:
retaddr_reg = ret_elem.attrib["register"]
else:
var_elem = ret_elem.find("varnode")
if var_elem is not None:
retaddr_space = var_elem.attrib["space"]
retaddr_offset = int(var_elem.attrib["offset"])
retaddr_size = int(var_elem.attrib["size"])

return cls(
data_organization=data_org,
global_scope=global_scope,
stackpointer_register=stackpointer,
returnaddress_register=retaddr_reg,
returnaddress_space=retaddr_space,
returnaddress_offset=retaddr_offset,
returnaddress_size=retaddr_size,
)
78 changes: 78 additions & 0 deletions pypcode/ldefs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import List, Optional
from xml.etree import ElementTree as ET


@dataclass
class ExternalName:
tool: str
name: str

@classmethod
def from_element(cls, element: ET.Element) -> ExternalName:
return cls(tool=element.attrib["tool"], name=element.attrib["name"])


@dataclass
class Compiler:
name: str
spec: str
id: str

@classmethod
def from_element(cls, element: ET.Element) -> Compiler:
return cls(name=element.attrib["name"], spec=element.attrib["spec"], id=element.attrib["id"])


@dataclass
class Language:
processor: str
endian: str
size: int
variant: str
version: str
slafile: str
processorspec: str
id: str
description: Optional[str] = None
manualindexfile: Optional[str] = None
instructionEndian: Optional[str] = None
compilers: List[Compiler] = field(default_factory=list)
external_names: List[ExternalName] = field(default_factory=list)

@classmethod
def from_element(cls, element: ET.Element) -> Language:
desc_elem = element.find("description")
description = desc_elem.text if desc_elem is not None else ""

return cls(
processor=element.attrib["processor"],
endian=element.attrib["endian"],
size=int(element.attrib["size"]),
variant=element.attrib["variant"],
version=element.attrib["version"],
slafile=element.attrib["slafile"],
processorspec=element.attrib["processorspec"],
id=element.attrib["id"],
description=description,
manualindexfile=element.attrib.get("manualindexfile"),
instructionEndian=element.attrib.get("instructionEndian"),
compilers=[Compiler.from_element(e) for e in element.findall("compiler")],
external_names=[ExternalName.from_element(e) for e in element.findall("external_name")],
)


@dataclass
class LanguageDefinitions:
languages: List[Language] = field(default_factory=list)

@classmethod
def from_xml(cls, xml_string: str) -> LanguageDefinitions:
root = ET.fromstring(xml_string)
return cls.from_element(root)

@classmethod
def from_element(cls, element: ET.Element) -> LanguageDefinitions:
return cls(languages=[Language.from_element(e) for e in element.findall("language")])
Loading

0 comments on commit 4acdee9

Please sign in to comment.