Skip to content

Commit

Permalink
Specifically track the position where enders end
Browse files Browse the repository at this point in the history
Build upon a5f381e
to make it work correctly when the same type of item
appears twice.
  • Loading branch information
notriddle authored and jgm committed Jan 26, 2024
1 parent d681657 commit 4f79ac4
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 12 deletions.
30 changes: 18 additions & 12 deletions commonmark/src/Commonmark/Tag.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ import Text.Parsec hiding (State)

data Enders =
Enders
{ scannedForCDATA :: !Bool
, scannedForProcessingInstruction :: !Bool
, scannedForDeclaration :: !Bool
{ scannedForCDATA :: !(Maybe SourcePos)
, scannedForProcessingInstruction :: !(Maybe SourcePos)
, scannedForDeclaration :: !(Maybe SourcePos)
} deriving Show

defaultEnders :: Enders
defaultEnders = Enders { scannedForCDATA = False
, scannedForProcessingInstruction = False
, scannedForDeclaration = False }
defaultEnders = Enders { scannedForCDATA = Nothing
, scannedForProcessingInstruction = Nothing
, scannedForDeclaration = Nothing }

(.&&.) :: (a -> Bool) -> (a -> Bool) -> (a -> Bool)
(.&&.) = liftM2 (&&)
Expand Down Expand Up @@ -165,12 +165,14 @@ htmlProcessingInstruction = try $ do
-- assume < has already been parsed
let questionmark = symbol '?'
op <- questionmark
pos <- getPosition
alreadyScanned <- lift $ gets scannedForProcessingInstruction
guard $ not alreadyScanned
guard $ maybe True (< pos) alreadyScanned
contents <- many $ satisfyTok (not . hasType (Symbol '?'))
<|> try (questionmark <*
notFollowedBy (symbol '>'))
lift $ modify $ \st -> st{ scannedForProcessingInstruction = True }
pos' <- getPosition
lift $ modify $ \st -> st{ scannedForProcessingInstruction = Just pos' }
cl <- sequence [ questionmark
, symbol '>' ]
return $ op : contents ++ cl
Expand All @@ -182,13 +184,15 @@ htmlDeclaration :: Monad m => ParsecT [Tok] s (StateT Enders m) [Tok]
htmlDeclaration = try $ do
-- assume < has already been parsed
op <- symbol '!'
pos <- getPosition
alreadyScanned <- lift $ gets scannedForDeclaration
guard $ not alreadyScanned
guard $ maybe True (< pos) alreadyScanned
let isDeclName t = not (T.null t) && T.all (isAscii .&&. isAlpha) t
name <- satisfyWord isDeclName
ws <- whitespace
contents <- many (satisfyTok (not . hasType (Symbol '>')))
lift $ modify $ \st -> st{ scannedForDeclaration = True }
pos' <- getPosition
lift $ modify $ \st -> st{ scannedForDeclaration = Just pos' }
cl <- symbol '>'
return $ op : name : ws ++ contents ++ [cl]

Expand All @@ -201,15 +205,17 @@ htmlCDATASection = try $ do
, symbol '['
, satisfyWord (== "CDATA")
, symbol '[' ]
pos <- getPosition
alreadyScanned <- lift $ gets scannedForCDATA
guard $ not alreadyScanned
guard $ maybe True (< pos) alreadyScanned
let ender = try $ sequence [ symbol ']'
, symbol ']'
, symbol '>' ]
contents <- many $ do
notFollowedBy ender
anyTok
lift $ modify $ \st -> st{ scannedForCDATA = True }
pos' <- getPosition
lift $ modify $ \st -> st{ scannedForCDATA = Just pos' }
cl <- ender
return $ op ++ contents ++ cl

Expand Down
29 changes: 29 additions & 0 deletions commonmark/test/regression.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,32 @@ Issue #133
<p>zz</p></li>
</ul>
````````````````````````````````


Issue #139
```````````````````````````````` example
Test <?xml?> <?xml?>
Test <?xml?> x <?xml?>
Test <![CDATA[ x ]]> <![CDATA[ x ]]>
Test <![CDATA[ x ]]> x <![CDATA[ x ]]>
Test <!DOCTYPE html> <!DOCTYPE html>
Test <!DOCTYPE html> x <!DOCTYPE html>
Test <span> <span>
Test <span> x <span>
.
<p>Test <?xml?> <?xml?></p>
<p>Test <?xml?> x <?xml?></p>
<p>Test <![CDATA[ x ]]> <![CDATA[ x ]]></p>
<p>Test <![CDATA[ x ]]> x <![CDATA[ x ]]></p>
<p>Test <!DOCTYPE html> <!DOCTYPE html></p>
<p>Test <!DOCTYPE html> x <!DOCTYPE html></p>
<p>Test <span> <span></p>
<p>Test <span> x <span></p>
````````````````````````````````
Expand Down

0 comments on commit 4f79ac4

Please sign in to comment.