Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Keynote 13.1 support. #53

Merged
merged 5 commits into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dumper/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

.PHONY=clean all

LLDB_PYTHON_PATH := ${shell lldb --python-path}
LLDB_PYTHON := ${shell lldb --python-path | cut -f 8 -d /}
LLDB_PYTHON_PATH := /opt/homebrew/opt/llvm//libexec/python3.11/site-packages/
LLDB_PYTHON := python3.11
IDENTITY := $(shell security find-identity -v -p codesigning | head -n 1 | python -c 'import sys; print(sys.stdin.read().split("\"")[1])')

all: mapping.py proto
Expand Down
7 changes: 5 additions & 2 deletions dumper/extract_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
# let's break in the CloudKit code and early exit the function before it can raise an exception:
target.BreakpointCreateByName("[CKContainer containerWithIdentifier:]")
# In later Keynote versions, 'containerWithIdentifier' isn't called directly, but we can break on similar methods:
# Note: this __lldb_unnamed_symbol index was determined by painstaking experimentation. It will break again for sure.
target.BreakpointCreateByName("___lldb_unnamed_symbol2482", "CloudKit")
# Note: this __lldb_unnamed_symbol hack was determined by painstaking experimentation. It will break again for sure.
target.BreakpointCreateByRegex("___lldb_unnamed_symbol[0-9]+", "CloudKit")

process = target.LaunchSimple(None, None, os.getcwd())

Expand All @@ -40,6 +40,9 @@
process.Continue()
else:
break
elif thread.GetStopReason() == lldb.eStopReasonException:
sys.stderr.write(repr(thread) + "\n")
raise NotImplementedError(f"LLDB caught exception, {__file__} needs to be updated to handle.")
if process.GetState() == lldb.eStateStopped:
if thread:
frame = thread.GetFrameAtIndex(0)
Expand Down
167 changes: 153 additions & 14 deletions dumper/protodump.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,150 @@
Inspired by Sean Patrick O'Brien (@obriensp)'s 2013 "proto-dump": https://github.com/obriensp/proto-dump
"""

import sys
from pathlib import Path
from tqdm import tqdm
from typing import List
from collections import defaultdict

from google.protobuf.internal.decoder import _DecodeVarint, SkipField
from google.protobuf import descriptor_pb2
from google.protobuf.descriptor_pool import DescriptorPool
from google.protobuf.message import DecodeError
from google.protobuf.internal import api_implementation


PROTO_TYPES = {
1: 'double',
2: 'float',
3: 'int64',
4: 'uint64',
5: 'int32',
6: 'fixed64',
7: 'fixed32',
8: 'bool',
9: 'string',
12: 'bytes',
13: 'uint32',
15: 'sfixed32',
16: 'sfixed64',
17: 'sint32',
18: 'sint64',
}

def to_proto_file(fds: descriptor_pb2.FileDescriptorSet) -> str:
if len(fds.file) != 1:
raise NotImplementedError("Only one file per fds.")
f = fds.file[0]
lines = [
"syntax = \"proto2\";",
""
]

for dependency in f.dependency:
lines.append(f'import "{dependency}";')

lines.append(f'package {f.package};')
lines.append("")

def generate_enum_lines(f, lines: List[str], indent: int = 0):
prefix = " " * indent
for enum in f.enum_type:
lines.append(prefix + f"enum {enum.name} " + '{')
for value in enum.value:
lines.append(prefix + f" {value.name} = {value.number};")
lines.append(prefix + '}')


def generate_field_line(field, in_oneof: bool = False) -> str:
line = []
if field.label == 1:
if not in_oneof:
line.append("optional")
elif field.label == 2:
line.append("required")
elif field.label == 3:
line.append("repeated")
else:
raise NotImplementedError("Unknown field label type!")

if field.type in PROTO_TYPES:
line.append(PROTO_TYPES[field.type])
elif field.type == 11 or field.type == 14: # MESSAGE
line.append(field.type_name)
else:
raise NotImplementedError(f"Unknown field type {field.type}!")

line.append(field.name)
line.append("=")
line.append(str(field.number));
options = []
if field.default_value:
options.append(f"default = {field.default_value}")
if field.options.deprecated:
options.append("deprecated = true")
if field.options.packed:
options.append("packed = true")
# TODO: Protobuf supports other options in square brackets!
# Add support for them here to make this feature-complete.
if options:
line.append(f"[{', '.join(options)}]")
return f" {' '.join(line)};"

def generate_extension_lines(message, lines: List[str], indent: int = 0):
prefix = " " * indent
extensions_grouped_by_extendee = defaultdict(list)
for extension in message.extension:
extensions_grouped_by_extendee[extension.extendee].append(extension)
for extendee, extensions in extensions_grouped_by_extendee.items():
lines.append(prefix + f"extend {extendee} {{")
for extension in extensions:
lines.append(prefix + generate_field_line(extension))
lines.append(prefix + "}")

def generate_message_lines(f, lines: List[str], indent: int = 0):
prefix = " " * indent

submessages = f.message_type if hasattr(f, 'message_type') else f.nested_type

for message in submessages:
# if message.name == "ContainedObjectsCommandArchive":
# breakpoint()
lines.append(prefix + f"message {message.name} " + '{')

generate_enum_lines(message, lines, indent + 1)
generate_message_lines(message, lines, indent + 1)

for field in message.field:
if not field.HasField("oneof_index"):
lines.append(prefix + generate_field_line(field))

# ...then the oneofs:
next_prefix = " " * (indent + 1)
for oneof_index, oneof in enumerate(message.oneof_decl):
lines.append(next_prefix + f"oneof {oneof.name} {{")
for field in message.field:
if field.HasField("oneof_index") and field.oneof_index == oneof_index:
lines.append(next_prefix + generate_field_line(field, in_oneof=True))
lines.append(next_prefix + "}")

if len(message.extension_range):
if len(message.extension_range) > 1:
raise NotImplementedError("Not sure how to handle multiple extension ranges!")
start, end = (
message.extension_range[0].start,
min(message.extension_range[0].end, 536870911)
)
lines.append(next_prefix + f"extensions {start} to {end};")

generate_extension_lines(message, lines, indent + 1)
lines.append(prefix + '}')
lines.append('')

generate_enum_lines(f, lines)
generate_message_lines(f, lines)
generate_extension_lines(f, lines)

return "\n".join(lines)


class ProtoFile(object):
Expand All @@ -39,19 +175,24 @@ def attempt_to_load(self):
try:
return self.pool.Add(self.file_descriptor_proto)
except Exception as e:
if "duplicate file name" in str(e):
return self.pool.FindFileByName(e.args[0].split("duplicate file name")[1].strip())
return None

@property
def descriptor(self):
return self.attempt_to_load()

def __repr__(self):
return "<%s: path=\"%s\">" % (self.__class__.__name__, self.path)
return '<%s: path="%s">' % (self.__class__.__name__, self.path)

@property
def source(self):
if self.descriptor:
return self.descriptor.GetDebugString()
fds = descriptor_pb2.FileDescriptorSet()
fds.file.append(descriptor_pb2.FileDescriptorProto())
fds.file[0].ParseFromString(self.descriptor.serialized_pb)
return to_proto_file(fds)
return None


Expand All @@ -77,19 +218,19 @@ def read_until_null_tag(data):


def extract_proto_from_file(filename, descriptor_pool):
with open(filename, 'rb') as f:
with open(filename, "rb") as f:
data = f.read()
offset = 0

PROTO_MARKER = b'.proto'
PROTO_MARKER = b".proto"

while True:
# Look for ".proto"
suffix_position = data.find(PROTO_MARKER, offset)
if suffix_position == -1:
break

marker_start = data.rfind(b'\x0A', offset, suffix_position)
marker_start = data.rfind(b"\x0A", offset, suffix_position)
if marker_start == -1:
# Doesn't look like a proto descriptor
offset = suffix_position + len(PROTO_MARKER)
Expand Down Expand Up @@ -163,16 +304,9 @@ def main():
parser.add_argument("output_path", help="Output directory to dump .protoc files to.")

args = parser.parse_args()

if api_implementation.Type() != "cpp":
raise NotImplementedError(
"This script requires the Protobuf installation to use the C++ implementation. Please"
" reinstall Protobuf with C++ support."
)

GLOBAL_DESCRIPTOR_POOL = DescriptorPool()

all_filenames = [str(path) for path in Path(args.input_path).rglob('*') if not path.is_dir()]
all_filenames = [str(path) for path in Path(args.input_path).rglob("*") if not path.is_dir()]

print(
f"Scanning {len(all_filenames):,} files under {args.input_path} for protobuf definitions..."
Expand All @@ -190,11 +324,16 @@ def main():
if not found.attempt_to_load():
missing_deps.update(find_missing_dependencies(proto_files_found, found.path))

for found in proto_files_found:
if not found.attempt_to_load():
missing_deps.add(found)

if missing_deps:
print(
f"Unable to print out all Protobuf definitions; {len(missing_deps):,} proto files could"
f" not be found:\n{missing_deps}"
)
sys.exit(1)
else:
for proto_file in tqdm(proto_files_found):
Path(args.output_path).mkdir(parents=True, exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion keynote_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
__major_version__ = 1
__patch_version__ = 0
__supported_keynote_version__ = keynote_parser.macos_app_version.MacOSAppVersion(
"12.2.1", "7035.0.161", "1A165"
"13.1", "7037.0.101", "1A98"
)
__version_tuple__ = (
__major_version__,
Expand Down
Loading