Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#8986 Image before Title still respects Title metadata #9162

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 45 additions & 21 deletions src/Text/Pandoc/Readers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ import Control.Monad.State.Strict
import Data.Bifunctor (bimap, first)
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect, foldl')
import Data.List (partition, delete, intersect, foldl')
import Data.Char (isSpace)
import qualified Data.Map as M
import qualified Data.Text as T
Expand Down Expand Up @@ -169,30 +169,54 @@ spansToKeep = []
divsToKeep :: [ParaStyleName]
divsToKeep = ["Definition", "Definition Term"]

metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
, ("Date", "date")
, ("Abstract", "abstract")]
multiMetaStyles :: M.Map ParaStyleName T.Text
multiMetaStyles = M.fromList [ ("Author", "author") ]

sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
-- | Meta Styles where just the first single instance is kept.
singleMetaStyles :: M.Map ParaStyleName T.Text
singleMetaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Date", "date")
, ("Abstract", "abstract")]

isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False
metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.union singleMetaStyles multiMetaStyles

isEmptyPar :: BodyPart -> Bool
isEmptyPar (Paragraph _ parParts) =
all isEmptyParPart parParts
sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts bps = (multiMetas ++ singleMetas, restWithoutRelevantMeta)
where
isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
isEmptyParPart _ = False
isEmptyElem (TextRun s) = trim s == ""
isEmptyElem _ = True
isEmptyPar _ = False
-- extract all metas from bps only based on metaStyles
(multiMetas, restWithoutMulti) = partition isMultiMetaPar bps

-- extract the first of every in singleMetaStyles and add to singleMetas, remaining elements to rest
(singleMetas, restWithoutRelevantMeta) = foldr extractSingle ([], restWithoutMulti) (M.keys singleMetaStyles)

extractSingle :: ParaStyleName -> ([BodyPart], [BodyPart]) -> ([BodyPart], [BodyPart])
extractSingle styleName (accSingleMetas, remainingBPs) =
let (found, rest) = extractFirst (isSingleMetaPar styleName) remainingBPs
in (maybeToList found ++ accSingleMetas, rest)

maybeToList :: Maybe a -> [a]
maybeToList Nothing = []
maybeToList (Just x) = [x]
Comment on lines +199 to +201
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there's a function like this in Data.Maybe


isSingleMetaPar :: ParaStyleName -> BodyPart -> Bool
isSingleMetaPar styleName (Paragraph pPr _) =
styleName `elem` getStyleNames (pStyle pPr)
isSingleMetaPar _ _ = False

extractFirst :: (a -> Bool) -> [a] -> (Maybe a, [a])
extractFirst _ [] = (Nothing, [])
extractFirst predicate (x:xs)
| predicate x = (Just x, xs)
| otherwise = let (found, rest) = extractFirst predicate xs
in (found, x : rest)


isMultiMetaPar :: BodyPart -> Bool
isMultiMetaPar (Paragraph pPr _) =
not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys multiMetaStyles)
isMultiMetaPar _ = False

bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
bodyPartsToMeta' [] = return M.empty
Expand Down
6 changes: 5 additions & 1 deletion test/Tests/Readers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ tests = [ testGroup "document"
"i18n blocks (headers and blockquotes)"
"docx/i18n_blocks.docx"
"docx/i18n_blocks.native"
, testCompare
"Image before Title"
"docx/image-before-title.docx"
"docx/image-before-title.native"
, testCompare
"lists"
"docx/lists.docx"
Expand Down Expand Up @@ -503,7 +507,7 @@ tests = [ testGroup "document"
"docx/metadata.docx"
"docx/metadata.native"
, testCompareWithOpts def{readerStandalone=True}
"stop recording metadata with normal text"
"recording metadata after normal text only if author"
"docx/metadata_after_normal.docx"
"docx/metadata_after_normal.native"
]
Expand Down
234 changes: 182 additions & 52 deletions test/docx/0_level_headers.native
Original file line number Diff line number Diff line change
@@ -1,52 +1,182 @@
[Table ("",[],[]) (Caption Nothing
[])
[(AlignDefault,ColWidth 1.0)]
(TableHead ("",[],[])
[])
[(TableBody ("",[],[]) (RowHeadColumns 0)
[]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "User\8217s",Space,Str "Guide"]]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "11",Space,Str "August",Space,Str "2017"]]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]
,Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[]]])]
(TableFoot ("",[],[])
[])
,Para [Str "CONTENTS"]
,Para [Strong [Str "Section",Space,Str "Page"]]
,Para [Str "FIGURES",Space,Link ("",[],[]) [Str "iv"] ("#figures","")]
,Para [Str "TABLES",Space,Link ("",[],[]) [Str "v"] ("#tables","")]
,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Link ("",[],[]) [Str "2"] ("#introduction","")]
,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"]
,Para [Strong [Str "Figure",Space,Str "Page"]]
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
,Header 1 ("tables",["Heading-0"],[]) [Str "TABLES"]
,Para [Strong [Str "Table",Space,Str "Page"]]
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
,Header 1 ("introduction",[],[]) [Str "Introduction"]
,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
Pandoc
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Screenshot 2023-10-31 at 14 27 39

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what to do with this. It is entirely logical this is being converted to a Title (meta) so I would prefer letting that meaning just sit in there and maybe change the example.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain more fully? Are you saying that the Title style is used for both the document title and the title of the table of contents section?

Meta
{ unMeta =
fromList [ ( "title" , MetaInlines [ Str "CONTENTS" ] ) ]
}
[ Table
( "" , [] , [] )
(Caption Nothing [])
[ ( AlignDefault , ColWidth 1.0 ) ]
(TableHead ( "" , [] , [] ) [])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)
[]
[ Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Plain [ Str "User\8217s" , Space , Str "Guide" ] ]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[ Plain
[ Str "11"
, Space
, Str "August"
, Space
, Str "2017"
]
]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
, Row
( "" , [] , [] )
[ Cell
( "" , [] , [] )
AlignDefault
(RowSpan 1)
(ColSpan 1)
[]
]
]
]
(TableFoot ( "" , [] , [] ) [])
, Para [ Strong [ Str "Section" , Space , Str "Page" ] ]
, Para
[ Str "FIGURES"
, Space
, Link ( "" , [] , [] ) [ Str "iv" ] ( "#figures" , "" )
]
, Para
[ Str "TABLES"
, Space
, Link ( "" , [] , [] ) [ Str "v" ] ( "#tables" , "" )
]
, Para
[ Str "SECTION"
, Space
, Str "1"
, Space
, Str "Introduction"
, Space
, Link ( "" , [] , [] ) [ Str "2" ] ( "#introduction" , "" )
]
, Header
1 ( "figures" , [ "Heading-0" ] , [] ) [ Str "FIGURES" ]
, Para [ Strong [ Str "Figure" , Space , Str "Page" ] ]
, Para
[ Strong
[ Str "No"
, Space
, Str "table"
, Space
, Str "of"
, Space
, Str "figures"
, Space
, Str "entries"
, Space
, Str "found."
]
]
, Header
1 ( "tables" , [ "Heading-0" ] , [] ) [ Str "TABLES" ]
, Para [ Strong [ Str "Table" , Space , Str "Page" ] ]
, Para
[ Strong
[ Str "No"
, Space
, Str "table"
, Space
, Str "of"
, Space
, Str "figures"
, Space
, Str "entries"
, Space
, Str "found."
]
]
, Header
1 ( "introduction" , [] , [] ) [ Str "Introduction" ]
, Para
[ Str "Nothing"
, Space
, Str "to"
, Space
, Str "introduce,"
, Space
, Str "yet."
]
]
Binary file added test/docx/image-before-title.docx
Binary file not shown.
Loading