Skip to content

Commit

Permalink
fix RST parsing after option lists (#17442)
Browse files Browse the repository at this point in the history
  • Loading branch information
a-mr authored Mar 25, 2021
1 parent 045400a commit 46364e6
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 1 deletion.
73 changes: 72 additions & 1 deletion lib/packages/docutils/rst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@
## * ***triple emphasis*** (bold and italic) using \*\*\*
## * ``:idx:`` role for \`interpreted text\` to include the link to this
## text into an index (example: `Nim index`_).
## * double slash `//` in option lists serves as a prefix for any option that
## starts from a word (without any leading symbols like `-`, `--`, `/`)::
##
## //compile compile the project
## //doc generate documentation
##
## Here the dummy `//` will disappear, while options ``compile``
## and ``doc`` will be left in the final document.
##
## .. [cmp:Sphinx] similar but different from the directives of
## Python `Sphinx directives`_ extensions
Expand Down Expand Up @@ -548,6 +556,67 @@ proc pushInd(p: var RstParser, ind: int) =
proc popInd(p: var RstParser) =
if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)

# Working with indentation in rst.nim
# -----------------------------------
#
# Every line break has an associated tkIndent.
# The tokenizer writes back the first column of next non-blank line
# in all preceeding tkIndent tokens to the `ival` field of tkIndent.
#
# RST document is separated into body elements (B.E.), every of which
# has a dedicated handler proc (or block of logic when B.E. is a block quote)
# that should follow the next rule:
# Every B.E. handler proc should finish at tkIndent (newline)
# after its B.E. finishes.
# Then its callers (which is `parseSection` or another B.E. handler)
# check for tkIndent ival (without necessity to advance `p.idx`)
# and decide themselves whether they continue processing or also stop.
#
# An example::
#
# L RST text fragment indentation
# +--------------------+
# 1 | | <- (empty line at the start of file) no tokens
# 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0
# 3 | | <- tkIndent has ival=0
# 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0
# 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2
# 6 | | <- tkIndent has ival=4
# 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4
# 8 | | <- tkIndent has ival=0
# 9 | | <- tkIndent has ival=0
# 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0
# +--------------------+
# C:01234
#
# Here parser starts with initial `indentStack=[0]` and then calls the
# 1st `parseSection`:
#
# - `parseSection` calls `parseParagraph` and "First paragraph" is parsed
# - bullet list handler is started at reaching ``*`` (L4 C0), it
# starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`,
# then calls `parseSection` (2nd call, nested) which parses
# paragraph "bullet list and its continuation" and then starts
# a block quote logic (L7 C4).
# The block quote logic calls calls `pushInd(p, ind=4)` and
# calls `parseSection` again, so a (simplified) sequence of calls now is::
#
# parseSection -> parseBulletList ->
# parseSection (+block quote logic) -> parseSection
#
# 3rd `parseSection` finishes, block quote logic calls `popInd(p)`,
# it returns to bullet item logic, which sees that next tkIndent has
# ival=0 and stops there since the required indentation for a bullet item
# is 2 and 0<2; the bullet item logic calls `popInd(p)`.
# Then bullet list handler checks that next tkWord (L10 C0) has the
# right indentation but does not have ``*`` so stops at tkIndent (L10).
# - 1st `parseSection` invocation calls `parseParagraph` and the
# "Final paragraph" is parsed.
#
# If a B.E. handler has advanced `p.idx` past tkIndent to check
# whether it should continue its processing or not, and decided not to,
# then this B.E. handler should step back (e.g. do `dec p.idx`).

proc initParser(p: var RstParser, sharedState: PSharedState) =
p.indentStack = @[0]
p.tok = @[]
Expand Down Expand Up @@ -1912,8 +1981,9 @@ proc parseBulletList(p: var RstParser): PRstNode =

proc parseOptionList(p: var RstParser): PRstNode =
result = newRstNodeA(p, rnOptionList)
let col = currentTok(p).col
while true:
if isOptionList(p):
if currentTok(p).col == col and isOptionList(p):
var a = newRstNode(rnOptionGroup)
var b = newRstNode(rnDescription)
var c = newRstNode(rnOptionListItem)
Expand All @@ -1936,6 +2006,7 @@ proc parseOptionList(p: var RstParser): PRstNode =
c.add(b)
result.add(c)
else:
dec p.idx # back to tkIndent
break

proc parseDefinitionList(p: var RstParser): PRstNode =
Expand Down
49 changes: 49 additions & 0 deletions tests/stdlib/trstgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,55 @@ Test1
let refline = "Ref. " & ref1 & "! and " & ref2 & ";and " & ref3 & "."
doAssert refline in output1

test "Option lists 1":
# check that "* b" is not consumed by previous bullet item because of
# incorrect indentation handling in option lists
let input = dedent """
* a
-m desc
-n very long
desc
* b"""
let output = input.toHtml
check(output.count("<ul") == 1)
check(output.count("<li>") == 2)
check(output.count("<table") == 1)
check("""<th align="left">-m</th><td align="left">desc</td>""" in output)
check("""<th align="left">-n</th><td align="left">very long desc</td>""" in
output)

test "Option lists 2":
# check that 2nd option list is not united with the 1st
let input = dedent """
* a
-m desc
-n very long
desc
-d option"""
let output = input.toHtml
check(output.count("<ul") == 1)
check(output.count("<table") == 2)
check("""<th align="left">-m</th><td align="left">desc</td>""" in output)
check("""<th align="left">-n</th><td align="left">very long desc</td>""" in
output)
check("""<th align="left">-d</th><td align="left">option</td>""" in
output)

test "Option list 3 (double /)":
let input = dedent """
* a
//compile compile1
//doc doc1
cont
-d option"""
let output = input.toHtml
check(output.count("<ul") == 1)
check(output.count("<table") == 2)
check("""<th align="left">compile</th><td align="left">compile1</td>""" in output)
check("""<th align="left">doc</th><td align="left">doc1 cont</td>""" in
output)
check("""<th align="left">-d</th><td align="left">option</td>""" in
output)
suite "RST/Code highlight":
test "Basic Python code highlight":
let pythonCode = """
Expand Down

0 comments on commit 46364e6

Please sign in to comment.