Skip to content

Commit

Permalink
Docx reader: reset lists after headers in same list numId.
Browse files Browse the repository at this point in the history
Headings in docx, even ones that do not have a visible number,
can have a numId, and in odd cases can even share a numId with
a list that continues after the header. In this case the list
numbering should be reset by the header.

To accomplish this, we add a Heading constructor to BodyPart and
include on it all the information list items have.

Closes #10258.
  • Loading branch information
jgm committed Oct 4, 2024
1 parent a2dfda0 commit 93d7457
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 15 deletions.
40 changes: 28 additions & 12 deletions src/Text/Pandoc/Readers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,34 @@ normalizeToClassName = T.map go . fromStyleName
| otherwise = c

bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Heading n style pPr numId lvl mblvlInfo parparts) = do
ils <- local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
let classes = map normalizeToClassName . delete style
$ getStyleNames (pStyle pPr)
hasNumbering <- gets docxNumberedHeadings
let addNum = if hasNumbering && not (numbered pPr)
then (++ ["unnumbered"])
else id
if T.null numId
then pure ()
else do
-- We check whether this current numId has previously been used,
-- since Docx expects us to pick up where we left off.
listState <- gets docxListState
let start = case M.lookup (numId, lvl) listState of
Nothing -> case mblvlInfo of
Nothing -> 1
Just (Level _ _ _ z) -> fromMaybe 1 z
Just z -> z + 1
modify $ \st -> st{ docxListState =
-- expire all the continuation data for lists of level > this one:
-- a new level 1 list item resets continuation for level 2+
-- see #10258
let notExpired (_, lvl') _ = lvl' <= lvl
in M.insert (numId, lvl) start
(M.filterWithKey notExpired listState) }
makeHeaderAnchor $ headerWith ("", addNum classes, []) n ils
bodyPartToBlocks (Paragraph pPr parparts)
| Just True <- pBidi pPr = do
let pPr' = pPr { pBidi = Nothing }
Expand All @@ -681,18 +709,6 @@ bodyPartToBlocks (Paragraph pPr parparts)
codeBlock $
T.concat $
map parPartToText parparts
| Just (style, n) <- pHeading pPr = do
ils <- local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
let classes = map normalizeToClassName . delete style
$ getStyleNames (pStyle pPr)

hasNumbering <- gets docxNumberedHeadings
let addNum = if hasNumbering && not (numbered pPr)
then (++ ["unnumbered"])
else id
makeHeaderAnchor $
headerWith ("", addNum classes, []) n ils
| otherwise = do
ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
Expand Down
13 changes: 10 additions & 3 deletions src/Text/Pandoc/Readers/Docx/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []


data BodyPart = Paragraph ParagraphStyle [ParPart]
| Heading Int ParaStyleName ParagraphStyle T.Text T.Text (Maybe Level)
[ParPart]
| ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
| Tbl T.Text TblGrid TblLook [Row]
| Captioned ParagraphStyle [ParPart] BodyPart
Expand Down Expand Up @@ -791,13 +793,15 @@ elemToBodyPart ns element
elemToBodyPart ns element
| isElem ns "w" "p" element
, Just (numId, lvl) <- getNumInfo ns element = do
lvlInfo <- lookupLevel numId lvl <$> asks envNumbering
parstyle <- elemToParagraphStyle ns element
<$> asks envParStyles
<*> asks envNumbering
parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
case pHeading parstyle of
Nothing -> mkListItem parstyle numId lvl parparts
Just _ -> return $ Paragraph parstyle parparts
Just (parstylename, lev)
-> return $ Heading lev parstylename parstyle numId lvl lvlInfo parparts
elemToBodyPart ns element
| isElem ns "w" "p" element
, [Elem ppr] <- elContent element
Expand Down Expand Up @@ -836,8 +840,11 @@ elemToBodyPart ns element
case pHeading parstyle of
Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
mkListItem parstyle numId lvl parparts
_ -> return $ Paragraph parstyle parparts

Just (parstylename, lev) -> do
let (numId, lvl) = fromMaybe ("","") $ pNumInfo parstyle
lvlInfo <- lookupLevel numId lvl <$> asks envNumbering
return $ Heading lev parstylename parstyle numId lvl lvlInfo parparts
Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let tblProperties = findChildByName ns "w" "tblPr" element
Expand Down

0 comments on commit 93d7457

Please sign in to comment.