diff --git a/README.md b/README.md index f87068bab..5227957fb 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ An [Apache PDFBox](https://github.com/apache/pdfbox) fork intended to be used as What's different from PDFBox? --------- -+ Requires JDK 17 ++ Requires JDK 21 + Lazy loading/parsing of PDF objects. Only the document xref table(s)/stream(s) is(are) initially parsed and information to lookup objects are retrieved, when later a PDF object is requested, the object is retrieve/parsed using the lookup information. This allows minimal memory footprint when you only need part of the document (Ex. you only need the information dictionary or the number of pages of the document). + Multiple I/O implementations to read from. SAMBox uses [Sejda-io](https://github.com/torakiki/sejda-io) allowing to use one of the provided implementation based on `java.nio.channels.FileChannel`, `java.io.InputStream` and `java.nio.MappedByteBuffer` (buffered or not). + Minimized GC through the use of a pool of `java.lang.StringBuilder`. diff --git a/pom.xml b/pom.xml index ceb281ae7..237754bd4 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,7 @@ - + 4.0.0 org.sejda sambox @@ -56,6 +58,7 @@ false 1.78.1 2.0.28 + 3.0.2 2.0.0 3.0.1 2.0.13 @@ -1529,10 +1532,14 @@ wget - https://issues.apache.org/jira/secure/attachment/13047577/PDFBOX-5484.ttf + + https://issues.apache.org/jira/secure/attachment/13047577/PDFBOX-5484.ttf + ${project.build.directory}/fonts PDFBOX-5484.ttf - 7c3d8bbc18654315d6341a277dcd5c66218b95c43baf190b6e32f77817d17bab421ef76f2c904b46c97f84c49b00d58525449cff970897010534d6aa2812a4e2 + + 7c3d8bbc18654315d6341a277dcd5c66218b95c43baf190b6e32f77817d17bab421ef76f2c904b46c97f84c49b00d58525449cff970897010534d6aa2812a4e2 + @@ -1572,6 +1579,17 @@ + + org.apache.pdfbox + xmpbox + ${xmpbox.version} + + + commons-logging + commons-logging + + + org.bouncycastle bcmail-jdk18on diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java index 8411d4807..d88b10424 100644 --- a/src/main/java/module-info.java +++ b/src/main/java/module-info.java @@ -27,6 +27,7 @@ requires transitive java.xml; requires transitive org.apache.fontbox; requires transitive org.sejda.io; + requires transitive org.apache.xmpbox; exports org.sejda.sambox; exports org.sejda.sambox.contentstream; diff --git a/src/main/java/org/sejda/sambox/output/WriteOption.java b/src/main/java/org/sejda/sambox/output/WriteOption.java index 1ff15260a..ef52cf5ee 100644 --- a/src/main/java/org/sejda/sambox/output/WriteOption.java +++ b/src/main/java/org/sejda/sambox/output/WriteOption.java @@ -18,7 +18,7 @@ /** * Options that can be selected when writing a PDF document. - * + * * @author Andrea Vacondio */ public enum WriteOption @@ -42,5 +42,14 @@ public enum WriteOption /** * Does not automatically update metadata modified date and producer when saving */ - NO_METADATA_PRODUCER_MODIFIED_DATE_UPDATE + NO_METADATA_PRODUCER_MODIFIED_DATE_UPDATE, + /** + * It creates or updates the document XMP metadata before the document is written. + *
    + *
  • Creates: if the document XMP metadata does not exist, it creates a new one based on the info dictionary
  • + *
  • Updates: if the document XMP metadata exists, it updates all the values corresponding to the info dictionary (see ISO 32000-2:2020 Chap 14.3.3 Table 349)
  • + *
+ * Note: we currently leave untouched a malformed metadata stream. + */ + UPSERT_DOCUMENT_METADATA_STREAM } diff --git a/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java b/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java index 74210ae16..26005e4e6 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java +++ b/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java @@ -27,6 +27,8 @@ import java.awt.Point; import java.awt.image.DataBuffer; import java.awt.image.Raster; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.File; import java.io.IOException; @@ -41,6 +43,10 @@ import java.util.Set; import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.xml.DomXmpParser; +import org.apache.xmpbox.xml.XmpParsingException; +import org.apache.xmpbox.xml.XmpSerializer; import org.sejda.commons.util.IOUtils; import org.sejda.io.CountingWritableByteChannel; import org.sejda.io.SeekableSources; @@ -61,6 +67,7 @@ import org.sejda.sambox.output.PDDocumentWriter; import org.sejda.sambox.output.PreSaveCOSTransformer; import org.sejda.sambox.output.WriteOption; +import org.sejda.sambox.pdmodel.common.PDMetadata; import org.sejda.sambox.pdmodel.encryption.AccessPermission; import org.sejda.sambox.pdmodel.encryption.PDEncryption; import org.sejda.sambox.pdmodel.encryption.SecurityHandler; @@ -68,6 +75,7 @@ import org.sejda.sambox.pdmodel.graphics.color.PDDeviceRGB; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.xml.transform.TransformerException; /** * This is the in-memory representation of the PDF document. @@ -535,14 +543,27 @@ private void writeTo(CountingWritableByteChannel output, StandardSecurity securi { requireOpen(); - if (Arrays.stream(options) - .noneMatch(i -> i == WriteOption.NO_METADATA_PRODUCER_MODIFIED_DATE_UPDATE)) + updateMetadata(options); + subsetFonts(); + + EncryptionContext encryptionContext = ofNullable(security).map(EncryptionContext::new) + .orElse(null); + generateFileIdentifier(output.toString().getBytes(StandardCharsets.ISO_8859_1), + encryptionContext); + try (PDDocumentWriter writer = new PDDocumentWriter(output, encryptionContext, + preSaveCOSTransformer, options)) { - // update producer and last modification date only if the write option doesn't state otherwise - getDocumentInformation().setProducer(SAMBox.PRODUCER); - getDocumentInformation().setModificationDate(Calendar.getInstance()); + onBeforeWrite.onBeforeWrite(); + writer.write(this); } + finally + { + IOUtils.close(this); + } + } + private void subsetFonts() + { for (Subsettable font : fontsToSubset) { try @@ -555,20 +576,57 @@ private void writeTo(CountingWritableByteChannel output, StandardSecurity securi } } fontsToSubset.clear(); - EncryptionContext encryptionContext = ofNullable(security).map(EncryptionContext::new) - .orElse(null); - generateFileIdentifier(output.toString().getBytes(StandardCharsets.ISO_8859_1), - encryptionContext); - try (PDDocumentWriter writer = new PDDocumentWriter(output, encryptionContext, - preSaveCOSTransformer, options)) + } + + private void updateMetadata(WriteOption[] options) + { + if (Arrays.stream(options) + .noneMatch(i -> i == WriteOption.NO_METADATA_PRODUCER_MODIFIED_DATE_UPDATE)) { - onBeforeWrite.onBeforeWrite(); - writer.write(this); + // update producer and last modification date only if the write option doesn't state otherwise + getDocumentInformation().setProducer(SAMBox.PRODUCER); + getDocumentInformation().setModificationDate(Calendar.getInstance()); } - finally + if (Arrays.stream(options).anyMatch(o -> o == WriteOption.UPSERT_DOCUMENT_METADATA_STREAM)) { - IOUtils.close(this); + requireMinVersion(V1_4); + var metadataStream = new PDMetadata(); + try (var metadataOutputStream = new BufferedOutputStream( + metadataStream.getCOSObject().createUnfilteredStream())) + { + new XmpSerializer().serialize( + getDocumentInformation().toXMPMetadata(getOrCreateXmpMetadata(), + getVersion()), metadataOutputStream, true); + getDocumentCatalog().setMetadata(metadataStream); + } + catch (IOException | TransformerException e) + { + LOG.warn("Unable to set xmp document metadata", e); + } + catch (XmpParsingException e) + { + LOG.warn("Unable to parse existing document level xmp metadata", e); + } + } + } + + private XMPMetadata getOrCreateXmpMetadata() throws XmpParsingException, IOException + { + var metadata = getDocumentCatalog().getMetadata(); + if (nonNull(metadata)) + { + try + { + var parser = new DomXmpParser(); + parser.setStrictParsing(false); + return parser.parse(new BufferedInputStream(metadata.createInputStream())); + } + finally + { + metadata.getCOSObject().unDecode(); + } } + return XMPMetadata.createXMPMetadata(); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/PDDocumentInformation.java b/src/main/java/org/sejda/sambox/pdmodel/PDDocumentInformation.java index 12524be54..fb8538477 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/PDDocumentInformation.java +++ b/src/main/java/org/sejda/sambox/pdmodel/PDDocumentInformation.java @@ -16,22 +16,28 @@ */ package org.sejda.sambox.pdmodel; -import org.sejda.sambox.cos.COSDictionary; -import org.sejda.sambox.cos.COSName; -import org.sejda.sambox.pdmodel.common.PDDictionaryWrapper; +import static java.util.Optional.ofNullable; import java.util.Calendar; +import java.util.GregorianCalendar; import java.util.Set; import java.util.TreeSet; +import java.util.UUID; + +import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.schema.AdobePDFSchema; +import org.apache.xmpbox.schema.DublinCoreSchema; +import org.apache.xmpbox.schema.XMPBasicSchema; +import org.sejda.sambox.cos.COSDictionary; +import org.sejda.sambox.cos.COSName; +import org.sejda.sambox.pdmodel.common.PDDictionaryWrapper; /** - * This is the document metadata. Each getXXX method will return the entry if - * it exists or null if it does not exist. If you pass in null for the setXXX - * method then it will clear the value. + * This is the document metadata. Each getXXX method will return the entry if it exists or null if + * it does not exist. If you pass in null for the setXXX method then it will clear the value. * * @author Ben Litchfield * @author Gerardo Ortiz - * */ public class PDDocumentInformation extends PDDictionaryWrapper { @@ -42,7 +48,7 @@ public PDDocumentInformation() /** * Creates a new instance with a given COS dictionary. - * + * * @param dictionary the dictionary */ public PDDocumentInformation(COSDictionary dictionary) @@ -53,17 +59,16 @@ public PDDocumentInformation(COSDictionary dictionary) /** * Return the properties String value. *

- * Allows to retrieve the - * low level date for validation purposes. - *

- * + * Allows to retrieve the low level date for validation purposes. + *

+ * * @param propertyKey the dictionaries key * @return the properties value */ - public Object getPropertyStringValue(String propertyKey) - { + public Object getPropertyStringValue(String propertyKey) + { return getCOSObject().getString(propertyKey); - } + } /** * This will get the title of the document. This will return null if no title exists. @@ -80,7 +85,7 @@ public String getTitle() * * @param title The new title for the document. */ - public void setTitle( String title ) + public void setTitle(String title) { getCOSObject().setString(COSName.TITLE, title); } @@ -100,7 +105,7 @@ public String getAuthor() * * @param author The new author for the document. */ - public void setAuthor( String author ) + public void setAuthor(String author) { getCOSObject().setString(COSName.AUTHOR, author); } @@ -120,7 +125,7 @@ public String getSubject() * * @param subject The new subject for the document. */ - public void setSubject( String subject ) + public void setSubject(String subject) { getCOSObject().setString(COSName.SUBJECT, subject); } @@ -140,7 +145,7 @@ public String getKeywords() * * @param keywords The new keywords for the document. */ - public void setKeywords( String keywords ) + public void setKeywords(String keywords) { getCOSObject().setString(COSName.KEYWORDS, keywords); } @@ -160,7 +165,7 @@ public String getCreator() * * @param creator The new creator for the document. */ - public void setCreator( String creator ) + public void setCreator(String creator) { getCOSObject().setString(COSName.CREATOR, creator); } @@ -180,13 +185,14 @@ public String getProducer() * * @param producer The new producer for the document. */ - public void setProducer( String producer ) + public void setProducer(String producer) { getCOSObject().setString(COSName.PRODUCER, producer); } /** - * This will get the creation date of the document. This will return null if no creation date exists. + * This will get the creation date of the document. This will return null if no creation date + * exists. * * @return The creation date of the document. */ @@ -200,13 +206,14 @@ public Calendar getCreationDate() * * @param date The new creation date for the document. */ - public void setCreationDate( Calendar date ) + public void setCreationDate(Calendar date) { getCOSObject().setDate(COSName.CREATION_DATE, date); } /** - * This will get the modification date of the document. This will return null if no modification date exists. + * This will get the modification date of the document. This will return null if no + * modification date exists. * * @return The modification date of the document. */ @@ -220,14 +227,13 @@ public Calendar getModificationDate() * * @param date The new modification date for the document. */ - public void setModificationDate( Calendar date ) + public void setModificationDate(Calendar date) { getCOSObject().setDate(COSName.MOD_DATE, date); } /** - * This will get the trapped value for the document. - * This will return null if one is not found. + * This will get the trapped value for the document. This will return null if one is not found. * * @return The trapped value for the document. */ @@ -253,11 +259,10 @@ public Set getMetadataKeys() } /** - * This will get the value of a custom metadata getCOSObject()rmation field for the document. This will return null - * if one is not found. + * This will get the value of a custom metadata getCOSObject()rmation field for the document. + * This will return null if one is not found. * * @param fieldName Name of custom metadata field from pdf document. - * * @return String Value of metadata field */ public String getCustomMetadataValue(String fieldName) @@ -268,31 +273,28 @@ public String getCustomMetadataValue(String fieldName) /** * Set the custom metadata value. * - * @param fieldName The name of the custom metadata field. + * @param fieldName The name of the custom metadata field. * @param fieldValue The value to the custom metadata field. */ - public void setCustomMetadataValue( String fieldName, String fieldValue ) + public void setCustomMetadataValue(String fieldName, String fieldValue) { getCOSObject().setString(fieldName, fieldValue); } - + public void removeMetadataField(String fieldName) { getCOSObject().removeItem(COSName.getPDFName(fieldName)); } /** - * This will set the trapped of the document. This will be - * 'True', 'False', or 'Unknown'. + * This will set the trapped of the document. This will be 'True', 'False', or 'Unknown'. * * @param value The new trapped value for the document. */ - public void setTrapped( String value ) + public void setTrapped(String value) { - if( value != null && - !value.equals( "True" ) && - !value.equals( "False" ) && - !value.equals( "Unknown" ) ) + if (value != null && !value.equals("True") && !value.equals("False") && !value.equals( + "Unknown")) { throw new IllegalArgumentException( "Valid values for trapped are " + "'True', 'False', or 'Unknown'"); @@ -300,4 +302,42 @@ public void setTrapped( String value ) getCOSObject().setName(COSName.TRAPPED, value); } + + /** + * @param metadata the metadata to update. If null a new empty {@link XMPMetadata} is created. + * @return and updated version of the input metadata where all the properties corresponding to + * the info dictionary have their values updated with the current value of the info dictionary. + */ + public XMPMetadata toXMPMetadata(XMPMetadata metadata, String pdfVersion) + { + metadata = ofNullable(metadata).orElseGet(XMPMetadata::createXMPMetadata); + AdobePDFSchema pdfSchema = ofNullable(metadata.getAdobePDFSchema()).orElseGet( + metadata::createAndAddAdobePDFSchema); + pdfSchema.setPDFVersion(pdfVersion); + ofNullable(getKeywords()).ifPresent(pdfSchema::setKeywords); + ofNullable(getProducer()).map(p -> pdfSchema.getMetadata().getTypeMapping() + .createAgentName(pdfSchema.getNamespace(), pdfSchema.getPrefix(), "Producer", p)) + .ifPresent(pdfSchema::addProperty); + pdfSchema.addProperty(pdfSchema.getMetadata().getTypeMapping() + .createBoolean(pdfSchema.getNamespace(), pdfSchema.getPrefix(), "Trapped", + getCOSObject().getBoolean(COSName.TRAPPED, false))); + + XMPBasicSchema basicSchema = ofNullable(metadata.getXMPBasicSchema()).orElseGet( + metadata::createAndAddXMPBasicSchema); + basicSchema.addIdentifier(UUID.randomUUID().toString()); + basicSchema.setMetadataDate(new GregorianCalendar()); + ofNullable(getModificationDate()).ifPresent(basicSchema::setModifyDate); + ofNullable(getCreator()).ifPresent(basicSchema::setCreatorTool); + ofNullable(getCreationDate()).ifPresent(basicSchema::setCreateDate); + + DublinCoreSchema dcSchema = ofNullable(metadata.getDublinCoreSchema()).orElseGet( + metadata::createAndAddDublinCoreSchema); + dcSchema.setFormat("application/pdf"); + ofNullable(getTitle()).ifPresent(dcSchema::setTitle); + ofNullable(getSubject()).ifPresent(dcSchema::setDescription); + ofNullable(getAuthor()).filter( + a -> ofNullable(dcSchema.getCreators()).map(l -> !l.contains(a)).orElse(true)) + .ifPresent(dcSchema::addCreator); + return metadata; + } } diff --git a/src/test/java/org/sejda/sambox/pdmodel/TestPDDocument.java b/src/test/java/org/sejda/sambox/pdmodel/TestPDDocument.java index 9070b5254..f3a48cd90 100644 --- a/src/test/java/org/sejda/sambox/pdmodel/TestPDDocument.java +++ b/src/test/java/org/sejda/sambox/pdmodel/TestPDDocument.java @@ -17,6 +17,7 @@ package org.sejda.sambox.pdmodel; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -30,10 +31,14 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; import java.util.Arrays; import java.util.Locale; import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.xml.DomXmpParser; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -42,6 +47,7 @@ import org.sejda.sambox.SAMBox; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSName; +import org.sejda.sambox.cos.COSStream; import org.sejda.sambox.input.PDFParser; import org.sejda.sambox.output.PreSaveCOSTransformer; import org.sejda.sambox.output.WriteOption; @@ -276,6 +282,139 @@ public void testWriteNoMetadata(@TempDir Path tmp) throws IOException } } + @Test + public void testWriteWithXMPMetadata(@TempDir Path tmp) throws Exception + { + var output = Files.createTempFile(tmp, "", ".pdf").toFile(); + try (PDDocument document = PDFParser.parse(SeekableSources.inMemorySeekableSourceFrom( + getClass().getResourceAsStream("/sambox/simple_test.pdf")))) + { + document.setVersion(SpecVersionUtils.V1_7); + document.getDocumentInformation().setTitle("Chick Norris"); + document.writeTo(output, WriteOption.UPSERT_DOCUMENT_METADATA_STREAM); + } + try (var outputDoc = PDFParser.parse(SeekableSources.seekableSourceFrom(output))) + { + try (var metadata = outputDoc.getDocumentCatalog().getCOSObject() + .getDictionaryObject(COSName.METADATA, COSStream.class)) + { + assertNotNull(metadata); + DomXmpParser parser = new DomXmpParser(); + parser.setStrictParsing(false); + XMPMetadata meta = parser.parse(metadata.getUnfilteredStream()); + var dublinCoreSchema = meta.getDublinCoreSchema(); + assertNotNull(dublinCoreSchema); + assertEquals("Chick Norris", dublinCoreSchema.getTitle()); + var basicSchema = meta.getXMPBasicSchema(); + assertNotNull(basicSchema); + assertNotNull(basicSchema.getMetadataDate()); + assertNotNull(basicSchema.getIdentifiers()); + assertEquals(1, basicSchema.getIdentifiers().size()); + var adobeSchema = meta.getAdobePDFSchema(); + assertNotNull(adobeSchema); + assertEquals(SpecVersionUtils.V1_7, adobeSchema.getPDFVersion()); + assertNotNull(adobeSchema.getProducer()); + } + } + } + + @Test + public void metadataNotCompressed(@TempDir Path tmp) throws Exception + { + var output = Files.createTempFile(tmp, "", ".pdf").toFile(); + try (PDDocument document = PDFParser.parse(SeekableSources.inMemorySeekableSourceFrom( + getClass().getResourceAsStream("/sambox/simple_test.pdf")))) + { + document.setVersion(SpecVersionUtils.V1_7); + document.getDocumentInformation().setTitle("Chick Norris"); + document.writeTo(output, WriteOption.UPSERT_DOCUMENT_METADATA_STREAM, + WriteOption.COMPRESS_STREAMS); + } + try (var outputDoc = PDFParser.parse(SeekableSources.seekableSourceFrom(output))) + { + var metadata = outputDoc.getDocumentCatalog().getCOSObject() + .getDictionaryObject(COSName.METADATA, COSStream.class); + assertNull(metadata.getCOSName(COSName.FILTER)); + } + } + + @Test + public void testWriteWithExistingXMPMetadata(@TempDir Path tmp) throws Exception + { + var output = Files.createTempFile(tmp, "", ".pdf").toFile(); + try (PDDocument document = PDFParser.parse(SeekableSources.inMemorySeekableSourceFrom( + getClass().getResourceAsStream("/sambox/simple_test_with_meta.pdf")))) + { + document.getDocumentInformation().setTitle("Steven Segal"); + document.writeTo(output, WriteOption.UPSERT_DOCUMENT_METADATA_STREAM); + } + try (var outputDoc = PDFParser.parse(SeekableSources.seekableSourceFrom(output))) + { + try (var metadata = outputDoc.getDocumentCatalog().getCOSObject() + .getDictionaryObject(COSName.METADATA, COSStream.class)) + { + assertNotNull(metadata); + var parser = new DomXmpParser(); + parser.setStrictParsing(false); + XMPMetadata meta = parser.parse(metadata.getUnfilteredStream()); + var dublinCoreSchema = meta.getDublinCoreSchema(); + assertNotNull(dublinCoreSchema); + assertEquals("Steven Segal", dublinCoreSchema.getTitle()); + } + } + } + + @Test + // malformed xmp are not fixed nor replaced. We currently leave them as they are this can be + // changed in the future with some more advanced logic even though it seems hard to try to catch + // all possible errors. Maybe we should look at Acrobat and see what it does + public void testWriteWithExistingMalformedXMPMetadata(@TempDir Path tmp) throws Exception + { + var output = Files.createTempFile(tmp, "", ".pdf").toFile(); + try (PDDocument document = PDFParser.parse(SeekableSources.inMemorySeekableSourceFrom( + getClass().getResourceAsStream("/sambox/xmp_metadata_missing_rdf_namespace.pdf")))) + { + document.writeTo(output, WriteOption.UPSERT_DOCUMENT_METADATA_STREAM); + } + try (var outputDoc = PDFParser.parse(SeekableSources.seekableSourceFrom(output))) + { + try (var metadata = outputDoc.getDocumentCatalog().getCOSObject() + .getDictionaryObject(COSName.METADATA, COSStream.class)) + { + assertNotNull(metadata); + } + } + } + + @Test + public void testWriteMetadataNoProducerModifyDate(@TempDir Path tmp) throws Exception + { + var output = Files.createTempFile(tmp, "", ".pdf").toFile(); + try (PDDocument document = PDFParser.parse(SeekableSources.inMemorySeekableSourceFrom( + getClass().getResourceAsStream("/sambox/simple_test.pdf")))) + { + document.writeTo(output, WriteOption.UPSERT_DOCUMENT_METADATA_STREAM, + WriteOption.NO_METADATA_PRODUCER_MODIFIED_DATE_UPDATE); + } + try (var outputDoc = PDFParser.parse(SeekableSources.seekableSourceFrom(output))) + { + try (var metadata = outputDoc.getDocumentCatalog().getCOSObject() + .getDictionaryObject(COSName.METADATA, COSStream.class)) + { + assertNotNull(metadata); + var parser = new DomXmpParser(); + parser.setStrictParsing(false); + XMPMetadata meta = parser.parse(metadata.getUnfilteredStream()); + var basicSchema = meta.getXMPBasicSchema(); + assertTrue(ZonedDateTime.of(2020, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC).plusYears(1) + .toInstant().isAfter(basicSchema.getModifyDate().toInstant())); + var adobeSchema = meta.getAdobePDFSchema(); + assertEquals("SAMBox 1.0.64.RELEASE-SNAPSHOT (www.sejda.org)", + adobeSchema.getProducer()); + } + } + } + @Test public void testPreSaveTransformer(@TempDir Path tmp) throws IOException { diff --git a/src/test/resources/sambox/simple_test_with_meta.pdf b/src/test/resources/sambox/simple_test_with_meta.pdf new file mode 100644 index 000000000..fb13eb0cb Binary files /dev/null and b/src/test/resources/sambox/simple_test_with_meta.pdf differ diff --git a/src/test/resources/sambox/xmp_metadata_missing_rdf_namespace.pdf b/src/test/resources/sambox/xmp_metadata_missing_rdf_namespace.pdf new file mode 100644 index 000000000..74ca11501 Binary files /dev/null and b/src/test/resources/sambox/xmp_metadata_missing_rdf_namespace.pdf differ