From 93d745742ddc06871ed7526e324f01506c7020e2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 3 Oct 2024 21:50:18 -0700 Subject: [PATCH] Docx reader: reset lists after headers in same list numId. Headings in docx, even ones that do not have a visible number, can have a numId, and in odd cases can even share a numId with a list that continues after the header. In this case the list numbering should be reset by the header. To accomplish this, we add a Heading constructor to BodyPart and include on it all the information list items have. Closes #10258. --- src/Text/Pandoc/Readers/Docx.hs | 40 +++++++++++++++++++-------- src/Text/Pandoc/Readers/Docx/Parse.hs | 13 +++++++-- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4130a749f7d6..689ee9f7cd0c 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -669,6 +669,34 @@ normalizeToClassName = T.map go . fromStyleName | otherwise = c bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks +bodyPartToBlocks (Heading n style pPr numId lvl mblvlInfo parparts) = do + ils <- local (\s-> s{docxInHeaderBlock=True}) + (smushInlines <$> mapM parPartToInlines parparts) + let classes = map normalizeToClassName . delete style + $ getStyleNames (pStyle pPr) + hasNumbering <- gets docxNumberedHeadings + let addNum = if hasNumbering && not (numbered pPr) + then (++ ["unnumbered"]) + else id + if T.null numId + then pure () + else do + -- We check whether this current numId has previously been used, + -- since Docx expects us to pick up where we left off. + listState <- gets docxListState + let start = case M.lookup (numId, lvl) listState of + Nothing -> case mblvlInfo of + Nothing -> 1 + Just (Level _ _ _ z) -> fromMaybe 1 z + Just z -> z + 1 + modify $ \st -> st{ docxListState = + -- expire all the continuation data for lists of level > this one: + -- a new level 1 list item resets continuation for level 2+ + -- see #10258 + let notExpired (_, lvl') _ = lvl' <= lvl + in M.insert (numId, lvl) start + (M.filterWithKey notExpired listState) } + makeHeaderAnchor $ headerWith ("", addNum classes, []) n ils bodyPartToBlocks (Paragraph pPr parparts) | Just True <- pBidi pPr = do let pPr' = pPr { pBidi = Nothing } @@ -681,18 +709,6 @@ bodyPartToBlocks (Paragraph pPr parparts) codeBlock $ T.concat $ map parPartToText parparts - | Just (style, n) <- pHeading pPr = do - ils <- local (\s-> s{docxInHeaderBlock=True}) - (smushInlines <$> mapM parPartToInlines parparts) - let classes = map normalizeToClassName . delete style - $ getStyleNames (pStyle pPr) - - hasNumbering <- gets docxNumberedHeadings - let addNum = if hasNumbering && not (numbered pPr) - then (++ ["unnumbered"]) - else id - makeHeaderAnchor $ - headerWith ("", addNum classes, []) n ils | otherwise = do ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts prevParaIls <- gets docxPrevPara diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 3c493022ee7c..ae87fec7a5db 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -283,6 +283,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] + | Heading Int ParaStyleName ParagraphStyle T.Text T.Text (Maybe Level) + [ParPart] | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] | Tbl T.Text TblGrid TblLook [Row] | Captioned ParagraphStyle [ParPart] BodyPart @@ -791,13 +793,15 @@ elemToBodyPart ns element elemToBodyPart ns element | isElem ns "w" "p" element , Just (numId, lvl) <- getNumInfo ns element = do + lvlInfo <- lookupLevel numId lvl <$> asks envNumbering parstyle <- elemToParagraphStyle ns element <$> asks envParStyles <*> asks envNumbering parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element) case pHeading parstyle of Nothing -> mkListItem parstyle numId lvl parparts - Just _ -> return $ Paragraph parstyle parparts + Just (parstylename, lev) + -> return $ Heading lev parstylename parstyle numId lvl lvlInfo parparts elemToBodyPart ns element | isElem ns "w" "p" element , [Elem ppr] <- elContent element @@ -836,8 +840,11 @@ elemToBodyPart ns element case pHeading parstyle of Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do mkListItem parstyle numId lvl parparts - _ -> return $ Paragraph parstyle parparts - + Just (parstylename, lev) -> do + let (numId, lvl) = fromMaybe ("","") $ pNumInfo parstyle + lvlInfo <- lookupLevel numId lvl <$> asks envNumbering + return $ Heading lev parstylename parstyle numId lvl lvlInfo parparts + Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let tblProperties = findChildByName ns "w" "tblPr" element