From 3a96df4c93bb30edd9021fd91f6d245e1a6c481f Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Mon, 6 Nov 2023 19:21:14 +0100 Subject: [PATCH] #8986 different behaviors based on type of meta --- src/Text/Pandoc/Readers/Docx.hs | 64 ++++++---- test/Tests/Readers/Docx.hs | 2 +- test/docx/metadata_after_normal.native | 169 ++++++++++++++++++++++++- 3 files changed, 204 insertions(+), 31 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8aea800c923fa..cd8626c379fc1 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -169,36 +169,54 @@ spansToKeep = [] divsToKeep :: [ParaStyleName] divsToKeep = ["Definition", "Definition Term"] +extractFirst :: (a -> Bool) -> [a] -> (Maybe a, [a]) +extractFirst _ [] = (Nothing, []) +extractFirst pred (x:xs) + | pred x = (Just x, xs) + | otherwise = let (found, rest) = extractFirst pred xs + in (found, x : rest) + +multiMetaStyles :: M.Map ParaStyleName T.Text +multiMetaStyles = M.fromList [ ("Author", "author") ] + +-- | Meta Styles where just the first single instance is kept. +singleMetaStyles :: M.Map ParaStyleName T.Text +singleMetaStyles = M.fromList [ ("Title", "title") + , ("Subtitle", "subtitle") + , ("Date", "date") + , ("Abstract", "abstract")] + metaStyles :: M.Map ParaStyleName T.Text -metaStyles = M.fromList [ ("Title", "title") - , ("Subtitle", "subtitle") - , ("Author", "author") - , ("Date", "date") - , ("Abstract", "abstract")] +metaStyles = M.union singleMetaStyles multiMetaStyles sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart]) -sepBodyParts bps = (metaWithoutEmpty, nonMetaFirst ++ emptyPars ++ nonMetaLast) +sepBodyParts bps = (multiMetas ++ singleMetas, rest) where - (nonMetaFirst, rest) = break isMetaOrEmpty bps - (meta, nonMetaLast) = span isMetaOrEmpty rest - isMetaOrEmpty bp = isMetaPar bp || isEmptyPar bp + -- extract all metas from bps only based on metaStyles + (multiMetas, restWithoutMulti) = partition isMultiMetaPar bps - (metaWithoutEmpty, emptyPars) = partition (not . isEmptyPar) meta + -- extract the first of every in singleMetaStyles and add to singleMetas, remaining elements to rest + (singleMetas, rest) = foldr extractSingle ([], restWithoutMulti) (M.keys singleMetaStyles) -isMetaPar :: BodyPart -> Bool -isMetaPar (Paragraph pPr _) = - not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles) -isMetaPar _ = False + extractSingle :: ParaStyleName -> ([BodyPart], [BodyPart]) -> ([BodyPart], [BodyPart]) + extractSingle styleName (accSingleMetas, remainingBPs) = + let (found, rest) = extractFirst (isSingleMetaPar styleName) remainingBPs + in (maybeToList found ++ accSingleMetas, rest) -isEmptyPar :: BodyPart -> Bool -isEmptyPar (Paragraph _ parParts) = - all isEmptyParPart parParts - where - isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems - isEmptyParPart _ = False - isEmptyElem (TextRun s) = trim s == "" - isEmptyElem _ = True -isEmptyPar _ = False + maybeToList :: Maybe a -> [a] + maybeToList Nothing = [] + maybeToList (Just x) = [x] + + isSingleMetaPar :: ParaStyleName -> BodyPart -> Bool + isSingleMetaPar styleName (Paragraph pPr _) = + styleName `elem` getStyleNames (pStyle pPr) + isSingleMetaPar _ _ = False + + +isMultiMetaPar :: BodyPart -> Bool +isMultiMetaPar (Paragraph pPr _) = + not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys multiMetaStyles) +isMultiMetaPar _ = False bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue) bodyPartsToMeta' [] = return M.empty diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 8d2576f63b679..94a0438190c55 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -507,7 +507,7 @@ tests = [ testGroup "document" "docx/metadata.docx" "docx/metadata.native" , testCompareWithOpts def{readerStandalone=True} - "stop recording metadata with normal text" + "recording metadata after normal text only if author" "docx/metadata_after_normal.docx" "docx/metadata_after_normal.native" ] diff --git a/test/docx/metadata_after_normal.native b/test/docx/metadata_after_normal.native index f0e31f8da2da4..560b31a548432 100644 --- a/test/docx/metadata_after_normal.native +++ b/test/docx/metadata_after_normal.native @@ -1,7 +1,162 @@ -Pandoc (Meta {unMeta = fromList [("abstract",MetaInlines [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]),("author",MetaList [MetaInlines [Str "Mary",Space,Str "Ann",Space,Str "Evans"],MetaInlines [Str "Aurore",Space,Str "Dupin"]]),("date",MetaInlines [Str "July",Space,Str "28,",Space,Str "2014"]),("title",MetaInlines [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"])]}) -[Para [Str "And",Space,Str "now",Space,Str "this",Space,Str "is",Space,Str "normal",Space,Str "text."] -,Para [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"] -,Para [Str "Mary",Space,Str "Ann",Space,Str "Evans"] -,Para [Str "Aurore",Space,Str "Dupin"] -,Para [Str "July",Space,Str "28,",Space,Str "2014"] -,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]] +Pandoc + Meta + { unMeta = + fromList + [ ( "abstract" + , MetaInlines + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "test" + , Space + , Str "of" + , Space + , Str "how" + , Space + , Str "this" + , Space + , Str "all" + , Space + , Str "works." + , Space + , Str "I\8217ve" + , Space + , Str "skipped" + , Space + , Str "lines" + , Space + , Str "here," + , Space + , Str "which" + , Space + , Str "pandoc" + , Space + , Str "doesn\8217t" + , Space + , Str "do," + , Space + , Str "but" + , Space + , Str "which" + , Space + , Str "shouldn\8217t" + , Space + , Str "make" + , Space + , Str "a" + , Space + , Str "difference." + ] + ) + , ( "author" + , MetaList + [ MetaInlines + [ Str "Mary" + , Space + , Str "Ann" + , Space + , Str "Evans" + ] + , MetaInlines [ Str "Aurore" , Space , Str "Dupin" ] + , MetaInlines + [ Str "Mary" + , Space + , Str "Ann" + , Space + , Str "Evans" + ] + , MetaInlines [ Str "Aurore" , Space , Str "Dupin" ] + ] + ) + , ( "date" + , MetaInlines + [ Str "July" , Space , Str "28," , Space , Str "2014" ] + ) + , ( "title" + , MetaInlines + [ Str "This" + , Space + , Str "Is" + , Space + , Str "the" + , Space + , Str "Title" + ] + ) + ] + } + [ Para + [ Str "And" + , Space + , Str "now" + , Space + , Str "this" + , Space + , Str "is" + , Space + , Str "normal" + , Space + , Str "text." + ] + , Para + [ Str "This" + , Space + , Str "Is" + , Space + , Str "the" + , Space + , Str "Title" + ] + , Para + [ Str "July" , Space , Str "28," , Space , Str "2014" ] + , Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "test" + , Space + , Str "of" + , Space + , Str "how" + , Space + , Str "this" + , Space + , Str "all" + , Space + , Str "works." + , Space + , Str "I\8217ve" + , Space + , Str "skipped" + , Space + , Str "lines" + , Space + , Str "here," + , Space + , Str "which" + , Space + , Str "pandoc" + , Space + , Str "doesn\8217t" + , Space + , Str "do," + , Space + , Str "but" + , Space + , Str "which" + , Space + , Str "shouldn\8217t" + , Space + , Str "make" + , Space + , Str "a" + , Space + , Str "difference." + ] + ]