Skip to content

Commit

Permalink
BUG: Title sometimes is bytes and not str.
Browse files Browse the repository at this point in the history
  • Loading branch information
reformy committed Nov 1, 2024
1 parent 98aa974 commit c31933b
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 0 deletions.
8 changes: 8 additions & 0 deletions pypdf/generic/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,14 @@ def create_string_object(
# version if possible... and the only way to check if that's
# possible is to try.
# Some strings are strings, some are just byte arrays.
try:
text = string.decode("utf-8")
retval = TextStringObject(text)
retval._original_bytes = string
return retval
except UnicodeDecodeError:
pass

retval = TextStringObject(decode_pdfdocencoding(string))
retval._original_bytes = string
retval.autodetect_pdfdocencoding = True
Expand Down
Binary file added resources/bytes.pdf
Binary file not shown.
9 changes: 9 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,18 @@ def test_read_metadata(pdf_path, expected):
docinfo.modification_date
docinfo.modification_date_raw
if "/Title" in metadict:
assert isinstance(docinfo.title, str)
assert metadict["/Title"] == docinfo.title


def test_read_metadata_title_is_bytes():
with open(RESOURCE_ROOT / "bytes.pdf", "rb") as inputfile:
reader = PdfReader(inputfile)
title = reader.metadata.title
# Should be a str.
assert title == "Microsoft Word - トランスバース社買収電話会議英語Final.docx"


def test_iss1943():
with PdfReader(RESOURCE_ROOT / "crazyones.pdf") as reader:
docinfo = reader.metadata
Expand Down

0 comments on commit c31933b

Please sign in to comment.