From ed8ea380cbf6cc69468b7a7f46dcf4a177cf7ef3 Mon Sep 17 00:00:00 2001 From: yzqzss Date: Tue, 9 Jul 2024 18:36:28 +0800 Subject: [PATCH] add `dryrun` option to `truncateXMLDump()` --- wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py b/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py index efb446c2..6d4f1dad 100644 --- a/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py +++ b/wikiteam3/dumpgenerator/dump/xmldump/xml_truncate.py @@ -24,8 +24,12 @@ def addNewline(filename: str) -> None: f.write("\n") -def truncateXMLDump(filename: str) -> str: - """Removes incomplete elements from the end of XML dump files""" +def truncateXMLDump(filename: str, dryrun: bool = False) -> str: + """ + Removes incomplete elements from the end of XML dump files + + dryrun: bool - returns the incomplete segment without truncating the file + """ with FileReadBackwards(filename, encoding="utf-8") as frb: incomplete_segment: str = "" @@ -36,6 +40,8 @@ def truncateXMLDump(filename: str) -> str: while xml_line and "" not in xml_line: incomplete_segment = xml_line + incomplete_segment xml_line = frb.readline() + if dryrun: + return incomplete_segment incomplete_segment_size = len(incomplete_segment.encode("utf-8")) file_size = os.path.getsize(filename) if file_size > incomplete_segment_size: