From 8dad0611cc1baec9a2e60ac29e95113378c7ceed Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sun, 21 Mar 2021 17:33:11 +0300 Subject: [PATCH 1/2] fix RST parsing after option lists --- lib/packages/docutils/rst.nim | 73 ++++++++++++++++++++++++++++++++++- tests/stdlib/trstgen.nim | 34 ++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index b5eef76105168..70d5b27d04916 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -82,6 +82,14 @@ ## * ***triple emphasis*** (bold and italic) using \*\*\* ## * ``:idx:`` role for \`interpreted text\` to include the link to this ## text into an index (example: `Nim index`_). +## * double slash `//` in option lists serves as a prefix for any option that +## starts from a word (without any leading symbols like `-`, `--`, `/`):: +## +## //compile compile the project +## //doc generate documentation +## +## Here the dummy `//` will disappear, while options ``compile`` +## and ``doc`` will be left in the final document. ## ## .. [cmp:Sphinx] similar but different from the directives of ## Python `Sphinx directives`_ extensions @@ -548,6 +556,67 @@ proc pushInd(p: var RstParser, ind: int) = proc popInd(p: var RstParser) = if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1) +# Working with indentation in rst.nim +# ----------------------------------- +# +# Every line break has an associated tkIndent. +# The tokenizer writes back the first column of next non-blank line +# in all preceeding tkIndent tokens to the `ival` field of tkIndent. +# +# RST document is separated into body elements (B.E.), every of which +# has a dedicated handler proc (or block of logic when B.E. is a block quote) +# that should follow the next rule: +# Every B.E. handler proc should finish at tkIndent (newline) +# after its B.E. finishes. +# Then its callers (which is `parseSection` or another B.E. handler) +# check for tkIndent ival (without necessity to advance `p.idx`) +# and decide themselves whether they continue processing or also stop. +# +# An example:: +# +# L RST text fragment indentation +# +--------------------+ +# 1 | | <- (empty line at the start of file) no tokens +# 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0 +# 3 | | <- tkIndent has ival=0 +# 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0 +# 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2 +# 6 | | <- tkIndent has ival=4 +# 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4 +# 8 | | <- tkIndent has ival=0 +# 9 | | <- tkIndent has ival=0 +# 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0 +# +--------------------+ +# C:01234 +# +# Here parser starts with initial `indentStack=[0]` and then calls the +# 1st `parseSection`: +# +# - `parseSection` calls `parseParagraph` and "First paragraph" is parsed +# - bullet list handler is started at reaching ``*`` (L4 C0), it +# starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`, +# then calls `parseSection` (2nd call, nested) which parses +# paragraph "bullet list and its continuation" and then starts +# a block quote logic (L7 C4). +# The block quote logic calls calls `pushInd(p, ind=4)` and +# calls `parseSection` again, so a (simplified) sequence of calls now is:: +# +# parseSection -> parseBulletList -> +# parseSection (+block quote logic) -> parseSection +# +# 3rd `parseSection` finishes, block quote logic calls `popInd(p)`, +# it returns to bullet item logic, which sees that next tkIndent has +# ival=0 and stops there since the required indentation for a bullet item +# is 2 and 0<2; the bullet item logic calls `popInd(p)`. +# Then bullet list handler checks that next tkWord (L10 C0) has the +# right indentation but does not have ``*`` so stops at tkIndent (L10). +# - 1st `parseSection` invocation calls `parseParagraph` and the +# "Final paragraph" is parsed. +# +# If a B.E. handler has advanced `p.idx` past tkIndent to check +# whether it should continue its processing or not, and decided not to, +# then this B.E. handler should step back (e.g. do `dec p.idx`). + proc initParser(p: var RstParser, sharedState: PSharedState) = p.indentStack = @[0] p.tok = @[] @@ -1901,8 +1970,9 @@ proc parseBulletList(p: var RstParser): PRstNode = proc parseOptionList(p: var RstParser): PRstNode = result = newRstNodeA(p, rnOptionList) + let col = currentTok(p).col while true: - if isOptionList(p): + if currentTok(p).col == col and isOptionList(p): var a = newRstNode(rnOptionGroup) var b = newRstNode(rnDescription) var c = newRstNode(rnOptionListItem) @@ -1925,6 +1995,7 @@ proc parseOptionList(p: var RstParser): PRstNode = c.add(b) result.add(c) else: + dec p.idx # back to tkIndent break proc parseDefinitionList(p: var RstParser): PRstNode = diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim index cf82cdf915b72..5c5e727ca2a99 100644 --- a/tests/stdlib/trstgen.nim +++ b/tests/stdlib/trstgen.nim @@ -1259,6 +1259,40 @@ Test1 let refline = "Ref. " & ref1 & "! and " & ref2 & ";and " & ref3 & "." doAssert refline in output1 + test "Option lists 1": + # check that "* b" is not consumed by previous bullet item because of + # incorrect indentation handling in option lists + let input = dedent """ + * a + -m desc + -n very long + desc + * b""" + let output = input.toHtml + check(output.count("") == 2) + check(output.count("-mdesc""" in output) + check("""-nvery long desc""" in + output) + + test "Option lists 2": + # check that 2nd option list is not united with the 1st + let input = dedent """ + * a + -m desc + -n very long + desc + -d option""" + let output = input.toHtml + check(output.count("-mdesc""" in output) + check("""-nvery long desc""" in + output) + check("""-doption""" in + output) + suite "RST/Code highlight": test "Basic Python code highlight": let pythonCode = """ From c298a6d9a2e48763c65b390a057ae45b6075c504 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Wed, 24 Mar 2021 20:53:38 +0300 Subject: [PATCH 2/2] add test for double / --- tests/stdlib/trstgen.nim | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim index 5c5e727ca2a99..1cfde22dddaa6 100644 --- a/tests/stdlib/trstgen.nim +++ b/tests/stdlib/trstgen.nim @@ -1293,6 +1293,21 @@ Test1 check("""-doption""" in output) + test "Option list 3 (double /)": + let input = dedent """ + * a + //compile compile1 + //doc doc1 + cont + -d option""" + let output = input.toHtml + check(output.count("compilecompile1""" in output) + check("""docdoc1 cont""" in + output) + check("""-doption""" in + output) suite "RST/Code highlight": test "Basic Python code highlight": let pythonCode = """