Skip to content

Commit

Permalink
Add support for the strict document format
Browse files Browse the repository at this point in the history
  • Loading branch information
mwilliamson committed Feb 18, 2024
1 parent cb8360c commit 3f91af0
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

* Throw error when failing to find the body element in a document.

* Add support for the strict document format.

# 1.6.0

* Support merged paragraphs when revisions are tracked.
Expand Down
14 changes: 12 additions & 2 deletions src/main/java/org/zwobble/mammoth/internal/docx/OfficeXml.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,25 @@

public class OfficeXml {
private static final NamespacePrefixes XML_NAMESPACES = NamespacePrefixes.builder()
// Transitional format
.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main")
.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships")
.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")
.put("a", "http://schemas.openxmlformats.org/drawingml/2006/main")
.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture")

// Strict format
.put("w", "http://purl.oclc.org/ooxml/wordprocessingml/main")
.put("r", "http://purl.oclc.org/ooxml/officeDocument/relationships")
.put("wp", "http://purl.oclc.org/ooxml/drawingml/wordprocessingDrawing")
.put("a", "http://purl.oclc.org/ooxml/drawingml/main")
.put("pic", "http://purl.oclc.org/ooxml/drawingml/picture")

// Common
.put("content-types", "http://schemas.openxmlformats.org/package/2006/content-types")
.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships")
.put("relationships", "http://schemas.openxmlformats.org/package/2006/relationships")
.put("v", "urn:schemas-microsoft-com:vml")
.put("mc", "http://schemas.openxmlformats.org/markup-compatibility/2006")
.put("v", "urn:schemas-microsoft-com:vml")
.put("office-word", "urn:schemas-microsoft-com:office:word")
.build();

Expand Down
8 changes: 8 additions & 0 deletions src/test/java/org/zwobble/mammoth/tests/MammothTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,14 @@ public void canExtractRawTextFromStream() throws IOException {
isSuccess("Apple\n\nBanana\n\n"));
}

@Test
public void canReadStrictFormat() throws IOException {
assertThat(
convertToHtml("strict-format.docx"),
isSuccess("<p>Test</p>")
);
}

private Result<String> convertToHtml(String name) throws IOException {
File file = TestData.file(name);
return new DocumentConverter().convertToHtml(file);
Expand Down
Binary file added src/test/resources/test-data/strict-format.docx
Binary file not shown.

0 comments on commit 3f91af0

Please sign in to comment.