From 6384ae9688101d5f8583303a99f264ac5d19dbcb Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 18 Jan 2024 15:30:10 -0700 Subject: [PATCH] Fix parsing of link destinations that look like `code` or Fixes #136 This works by re-parsing the tokens that come after the link, but only when the end delimiter isn't on a chunk boundary (since that's the only way this problem can happen). Re-parsing a specific chunk won't work, because the part that needs re-interpreted can span more than one chunk. For example, we can draw the bounds of the erroneous code chunk in this example: [x](`) ^-----------^ If we re-parse the underlined part in isolation, we'll fix the first link, but won't find the HTML (since the closing angle bracket is in the next chunk). On the other hand, parsing links, code, and HTML in a single pass would make writing extensions more complicated. For example, LaTeX math is supposed to have the same binding strength as code spans: $first[$](about) ^------^ this is a math span, not a link [first]($)$5/8$ ^-^ this is an analogue of the original bug it shouldn't be a math span, but looks like one --- commonmark/src/Commonmark/Inlines.hs | 54 +++++++++++++++------------- commonmark/test/regression.md | 29 ++++++++++++++- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/commonmark/src/Commonmark/Inlines.hs b/commonmark/src/Commonmark/Inlines.hs index fc292e3..6d6f61f 100644 --- a/commonmark/src/Commonmark/Inlines.hs +++ b/commonmark/src/Commonmark/Inlines.hs @@ -54,7 +54,7 @@ import Data.List (foldl') import Unicode.Char (isAscii, isAlpha) import qualified Data.IntMap.Strict as IntMap import qualified Data.Map.Strict as M -import Data.Maybe (isJust, mapMaybe, listToMaybe) +import Data.Maybe (isJust, mapMaybe, listToMaybe, maybeToList) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T @@ -73,18 +73,22 @@ mkInlineParser :: (Monad m, IsInline a) mkInlineParser bracketedSpecs formattingSpecs ilParsers attrParsers rm toks = do let iswhite t = hasType Spaces t || hasType LineEnd t let attrParser = choice attrParsers - let toks' = dropWhile iswhite . reverse . dropWhile iswhite . reverse $ toks - res <- {-# SCC parseChunks #-} evalStateT - (parseChunks bracketedSpecs formattingSpecs ilParsers - attrParser rm toks') defaultEnders + let go chunks toks' bottoms = do + chunks' <- {-# SCC parseChunks #-} evalStateT + (parseChunks bracketedSpecs formattingSpecs ilParsers + attrParser rm toks') defaultEnders + case chunks' of + Left err -> return $ Left err + Right chunks'' -> + case (processBrackets bracketedSpecs rm (chunks ++ chunks'') bottoms) of + Left st -> go ((reverse . befores . rightCursor) st) (mconcat (map chunkToks $ (maybeToList . center $ rightCursor st) ++ (afters $ rightCursor st))) (stackBottoms st) + Right chunks''' -> return $ Right chunks''' + res <- go [] ((dropWhile iswhite . reverse . dropWhile iswhite . reverse) toks) mempty return $! - case res of - Left err -> Left err - Right chunks -> - (Right . - unChunks . - processEmphasis . - processBrackets bracketedSpecs rm) chunks + case res of + Left err -> Left err + Right chunks -> + (Right . unChunks . processEmphasis) chunks defaultInlineParser :: (Monad m, IsInline a) => InlineParser m a defaultInlineParser = @@ -695,19 +699,19 @@ bracketMatchedCount :: [Chunk a] -> Int bracketMatchedCount chunksinside = sum $ map bracketChunkToNumber chunksinside processBrackets :: IsInline a - => [BracketedSpec a] -> ReferenceMap -> [Chunk a] -> [Chunk a] -processBrackets bracketedSpecs rm xs = + => [BracketedSpec a] -> ReferenceMap -> [Chunk a] -> M.Map Text SourcePos -> Either (DState a) [Chunk a] +processBrackets bracketedSpecs rm xs bottoms = case break (\case (Chunk Delim{ delimType = '[' } _ _) -> True _ -> False) xs of - (_,[]) -> xs + (_,[]) -> Right xs (ys,z:zs) -> let startcursor = Cursor (Just z) (reverse ys) zs in processBs bracketedSpecs DState{ leftCursor = startcursor , rightCursor = startcursor , refmap = rm - , stackBottoms = mempty + , stackBottoms = bottoms , absoluteBottom = chunkPos z } @@ -733,7 +737,7 @@ moveRight (Cursor (Just x) zs (y:ys)) = Cursor (Just y) (x:zs) ys {-# INLINE moveRight #-} processBs :: IsInline a - => [BracketedSpec a] -> DState a -> [Chunk a] + => [BracketedSpec a] -> DState a -> Either (DState a) [Chunk a] processBs bracketedSpecs st = let left = leftCursor st right = rightCursor st @@ -741,7 +745,7 @@ processBs bracketedSpecs st = bottom = absoluteBottom st -- trace (prettyCursors left right) $ return $! () in {-# SCC processBs #-} case (center left, center right) of - (_, Nothing) -> reverse $ + (_, Nothing) -> Right $ reverse $ case center (rightCursor st) of Nothing -> befores (rightCursor st) Just c -> c : befores (rightCursor st) @@ -828,8 +832,8 @@ processBs bracketedSpecs st = firstAfterTokPos = tokPos <$> listToMaybe (concatMap chunkToks afterchunks) -- in the event that newpos is not at the - -- beginning of a chunk, we need to add - -- some tokens from that chunk... + -- beginning of a chunk, we need to re-chunk + -- with those tokens and everything after them missingtoks = [t | t <- suffixToks , tokPos t >= newpos @@ -842,13 +846,12 @@ processBs bracketedSpecs st = (str (untokenize missingtoks)))) newpos missingtoks :) - in case addMissing afterchunks of - [] -> processBs bracketedSpecs - st{ rightCursor = Cursor Nothing + st' = case addMissing afterchunks of + [] -> st{ rightCursor = Cursor Nothing (eltchunk : befores left') [] } (y:ys) -> let lbs = befores left' - in processBs bracketedSpecs st{ + in st{ leftCursor = Cursor (Just eltchunk) lbs (y:ys) , rightCursor = fixSingleQuote $ @@ -862,6 +865,9 @@ processBs bracketedSpecs st = (chunkPos opener) $ stackBottoms st } + in if null missingtoks + then processBs bracketedSpecs st' + else Left st' -- Bracket matched count /= 0 -- -- Links § 6.3 ¶ 2 • 2 diff --git a/commonmark/test/regression.md b/commonmark/test/regression.md index 53ee8c0..238413b 100644 --- a/commonmark/test/regression.md +++ b/commonmark/test/regression.md @@ -4,7 +4,8 @@ Issue #113: EOL character weirdness on Windows (Important: first line ends with CR + CR + LF) ```````````````````````````````` example -line1 +line1 + line2 .

line1

@@ -307,3 +308,29 @@ Issue #133

zz

```````````````````````````````` + + +Issue #136 +```````````````````````````````` example +[link](`) `x` +. +

link x

+```````````````````````````````` + +```````````````````````````````` example +[link](`)[link](`) `x` +. +

linklink x

+```````````````````````````````` + +```````````````````````````````` example +[link]() `x` +. +

link">) x

+```````````````````````````````` + +```````````````````````````````` example +[![image]()![image]()](v) `x` +. +

image">)image">) x

+````````````````````````````````