Skip to content

Preserve attributes on HTML paragraphs #10850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MANUAL.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5374,7 +5374,7 @@ from being interpreted as Markdown.

### Extension: `native_divs` ###

Use native pandoc `Div` blocks for content inside `<div>` tags.
Use native pandoc `Div` blocks for content inside `<div>` tags. This extension also influences how HTML `<p>` tags with attributes are processed, by wrapping them in `Div` blocks to better preserve their attributes during conversion.
For the most part this should give the same output as
`markdown_in_html_blocks`, but it makes it easier to write pandoc
filters to manipulate groups of blocks.
Expand Down
33 changes: 29 additions & 4 deletions src/Text/Pandoc/Readers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -623,13 +623,38 @@ pPlain = do
then return mempty
else return $ B.plain contents

pPara :: PandocMonad m => TagParser m Blocks
pPara = do
-- Helper function for pPara when significant attributes are present
pParaWithWrapper :: PandocMonad m => Attr -> TagParser m Blocks
pParaWithWrapper (ident, classes, kvs) = do
guardEnabled Ext_native_divs -- Ensure native_divs is enabled for this behavior
contents <- trimInlines <$> pInTags "p" inline
(do guardDisabled Ext_empty_paragraphs
guard (null contents)
return mempty) <|> do
let wrapperAttr = ("wrapper", "1")
let finalKVs = wrapperAttr : kvs
let finalAttrs = (ident, classes, finalKVs)
return $ B.divWith finalAttrs (B.para contents)

-- Helper function for pPara when no significant attributes are present
pParaSimple :: PandocMonad m => TagParser m Blocks
pParaSimple = do
contents <- trimInlines <$> pInTags "p" inline
(do guardDisabled Ext_empty_paragraphs
guard (null contents)
return mempty)
<|> return (B.para contents)
return mempty) <|>
return (B.para contents)

pPara :: PandocMonad m => TagParser m Blocks
pPara = do
TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "p" [])
let attr@(ident, classes, kvs) = toAttr attr'
-- "Significant" attributes are any id, class, or key-value pair.
let hasSignificantAttributes = not (T.null ident) || not (null classes) || not (null kvs)

if hasSignificantAttributes
then pParaWithWrapper attr
else pParaSimple

pFigure :: PandocMonad m => TagParser m Blocks
pFigure = do
Expand Down
7 changes: 7 additions & 0 deletions src/Text/Pandoc/Writers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,13 @@ blockToHtmlInner opts (Para lst) = do
blockToHtmlInner opts (LineBlock lns) = do
htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns
return $ H.div ! A.class_ "line-block" $ htmlLines
blockToHtmlInner opts (Div (ident, classes, kvs) [Para pans]) | Just "1" <- lookup "wrapper" kvs = do
-- This is a paragraph that was wrapped in a Div by the reader
-- Unwrap it back to a <p> tag, transferring attributes from the Div
let pKVs = filter (\(k,_) -> k /= "wrapper") kvs
let pAttr = (ident, classes, pKVs)
inner <- inlineListToHtml opts pans
addAttrs opts pAttr (H.p inner)
blockToHtmlInner opts (Div (ident, "section":dclasses, dkvs)
(Header level
hattr@(hident,hclasses,hkvs) ils : xs)) = do
Expand Down
38 changes: 38 additions & 0 deletions test/Tests/Readers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,44 @@ tests = [ testGroup "base tag"
=?>
codeBlockWith ("c", [], []) "print('hi mom!')"
]
, testGroup "paragraph attributes"
[ test htmlNativeDivs "paragraph with id and class" $
"<p id=\"mypara\" class=\"important\">This is a paragraph.</p>" =?>
doc (divWith ("mypara", ["important"], [("wrapper", "1")]) (para (text "This is a paragraph.")))
, test htmlNativeDivs "paragraph with id only" $
"<p id=\"mypara\">This is a paragraph.</p>" =?>
doc (divWith ("mypara", [], [("wrapper", "1")]) (para (text "This is a paragraph.")))
, test htmlNativeDivs "paragraph with class only" $
"<p class=\"important\">This is a paragraph.</p>" =?>
doc (divWith ("", ["important"], [("wrapper", "1")]) (para (text "This is a paragraph.")))
, test htmlNativeDivs "paragraph with multiple classes" $
"<p class=\"important urgent\">This is a paragraph.</p>" =?>
doc (divWith ("", ["important", "urgent"], [("wrapper", "1")]) (para (text "This is a paragraph.")))
, test htmlNativeDivs "paragraph with key-value attributes" $
"<p data-foo=\"bar\">This is a paragraph.</p>" =?>
doc (divWith ("", [], [("wrapper", "1"), ("foo", "bar")]) (para (text "This is a paragraph.")))
, test htmlNativeDivs "paragraph without attributes" $
"<p>This is a normal paragraph.</p>" =?>
doc (para (text "This is a normal paragraph."))
, test htmlNativeDivs "paragraph with align only (center)" $
"<p align=\"center\">Aligned paragraph.</p>" =?>
doc (divWith ("", [], [("wrapper", "1"), ("align", "center")]) (para (text "Aligned paragraph.")))
, test htmlNativeDivs "paragraph with align only (right)" $
"<p align=\"right\">Aligned paragraph.</p>" =?>
doc (divWith ("", [], [("wrapper", "1"), ("align", "right")]) (para (text "Aligned paragraph.")))
, test htmlNativeDivs "paragraph with align and id" $
"<p id=\"foo\" align=\"left\">Aligned paragraph with id.</p>" =?>
doc (divWith ("foo", [], [("wrapper", "1"), ("align", "left")]) (para (text "Aligned paragraph with id.")))
, test htmlNativeDivs "paragraph with align and class" $
"<p class=\"bar\" align=\"justify\">Aligned paragraph with class.</p>" =?>
doc (divWith ("", ["bar"], [("wrapper", "1"), ("align", "justify")]) (para (text "Aligned paragraph with class.")))
, test htmlNativeDivs "paragraph with invalid align" $
"<p align=\"invalid\">Invalid align.</p>" =?>
doc (divWith ("", [], [("wrapper", "1"), ("align", "invalid")]) (para (text "Invalid align.")))
, test htmlNativeDivs "paragraph with invalid align and id" $
"<p id=\"baz\" align=\"invalid\">Invalid align with id.</p>" =?>
doc (divWith ("baz", [], [("wrapper", "1"), ("align", "invalid")]) (para (text "Invalid align with id.")))
]
, askOption $ \(QuickCheckTests numtests) ->
testProperty "Round trip" $
withMaxSuccess (if QuickCheckTests numtests == defaultValue
Expand Down
29 changes: 29 additions & 0 deletions test/Tests/Writers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ htmlQTags = unpack
. purely (writeHtml4String def{ writerWrapText = WrapNone, writerHtmlQTags = True })
. toPandoc

html5 :: (ToPandoc a) => a -> String
html5 = unpack . purely (writeHtml5String def{ writerWrapText = WrapNone }) . toPandoc

{-
"my test" =: X =?> Y

Expand Down Expand Up @@ -208,6 +211,32 @@ tests =
, "</div>"
]
]
, testGroup "paragraph attributes"
[ "paragraph with id and class" =:
divWith ("mypara", ["important"], [("wrapper", "1")]) (para (text "This is a paragraph."))
=?> "<p id=\"mypara\" class=\"important\">This is a paragraph.</p>"
, "paragraph with id only" =:
divWith ("mypara", [], [("wrapper", "1")]) (para (text "This is a paragraph."))
=?> "<p id=\"mypara\">This is a paragraph.</p>"
, "paragraph with class only" =:
divWith ("", ["important"], [("wrapper", "1")]) (para (text "This is a paragraph."))
=?> "<p class=\"important\">This is a paragraph.</p>"
, "paragraph with multiple classes" =:
divWith ("", ["important", "urgent"], [("wrapper", "1")]) (para (text "This is a paragraph."))
=?> "<p class=\"important urgent\">This is a paragraph.</p>"
, test html5 "paragraph with key-value attributes"
(divWith ("", [], [("wrapper", "1"), ("foo", "bar")]) (para (text "This is a paragraph."))
, "<p data-foo=\"bar\">This is a paragraph.</p>" :: String)
, "paragraph without wrapper attribute" =:
divWith ("mydiv", ["someclass"], []) (para (text "This is a div, not a p."))
=?> "<div id=\"mydiv\" class=\"someclass\">\n<p>This is a div, not a p.</p>\n</div>"
, "paragraph with wrapper and other attributes" =:
divWith ("mypara", ["important"], [("wrapper", "1"), ("data-value", "123")]) (para (text "This is a paragraph."))
=?> "<p id=\"mypara\" class=\"important\" data-value=\"123\">This is a paragraph.</p>"
, "paragraph with wrapper and align" =:
divWith ("mypara", [], [("wrapper", "1"), ("align", "center")]) (para (text "Aligned paragraph."))
=?> "<p id=\"mypara\" align=\"center\">Aligned paragraph.</p>"
]
]
where
tQ :: (ToString a, ToPandoc a)
Expand Down
68 changes: 68 additions & 0 deletions test/command/10768.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
```
% pandoc -f html -t native
<p id="test" class="foo bar" data-custom="value">This is a paragraph with attributes.</p>
^D
[ Div
( "test"
, [ "foo" , "bar" ]
, [ ( "wrapper" , "1" ) , ( "custom" , "value" ) ]
)
[ Para
[ Str "This"
, Space
, Str "is"
, Space
, Str "a"
, Space
, Str "paragraph"
, Space
, Str "with"
, Space
, Str "attributes."
]
]
]
```

```
% pandoc -f native -t html
[ Div
( "test"
, [ "foo" , "bar" ]
, [ ( "wrapper" , "1" ) , ( "custom" , "value" ) ]
)
[ Para
[ Str "This"
, Space
, Str "is"
, Space
, Str "a"
, Space
, Str "paragraph"
, Space
, Str "with"
, Space
, Str "attributes."
]
]
]
^D
<p id="test" class="foo bar" data-custom="value">This is a paragraph
with attributes.</p>
```

```
% pandoc -f html -t html
<p id="test" class="foo bar" data-custom="value">This is a paragraph with attributes.</p>
^D
<p id="test" class="foo bar" data-custom="value">This is a paragraph
with attributes.</p>
```

```
% pandoc -f html -t html5
<p id="test" class="foo bar" data-custom="value">This is a paragraph with attributes.</p>
^D
<p id="test" class="foo bar" data-custom="value">This is a paragraph
with attributes.</p>
```
Loading
Loading