Skip to content

Commit

Permalink
Fix bug 27 of #17340 (#19433)
Browse files Browse the repository at this point in the history
Fixes silent disappearance of Markdown (pseudo-)link when it's detected as
unsafe protocol. Now it will be converted to plain text in spirit of
[the specification](https://spec.commonmark.org/0.30/#links).
For that sake the check for protocol is added to rst.nim also.
  • Loading branch information
a-mr authored Feb 7, 2022
1 parent 4b0636f commit 801c0f0
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 40 deletions.
66 changes: 48 additions & 18 deletions lib/packages/docutils/rst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@

import
os, strutils, rstast, dochelpers, std/enumutils, algorithm, lists, sequtils,
std/private/miscdollars, tables
std/private/miscdollars, tables, strscans
from highlite import SourceLanguage, getSourceLanguage

type
Expand Down Expand Up @@ -1347,15 +1347,36 @@ proc match(p: RstParser, start: int, expr: string): bool =
inc i
result = true

proc fixupEmbeddedRef(n, a, b: PRstNode) =
proc safeProtocol*(linkStr: var string): string =
# Returns link's protocol and, if it's not safe, clears `linkStr`
result = ""
if scanf(linkStr, "$w:", result):
# if it has a protocol at all, ensure that it's not 'javascript:' or worse:
if cmpIgnoreCase(result, "http") == 0 or
cmpIgnoreCase(result, "https") == 0 or
cmpIgnoreCase(result, "ftp") == 0:
discard "it's fine"
else:
linkStr = ""

proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool =
# Returns `true` if the link belongs to an allowed protocol
var sep = - 1
for i in countdown(n.len - 2, 0):
if n.sons[i].text == "<":
sep = i
break
var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
for i in countup(0, sep - incr): a.add(n.sons[i])
for i in countup(sep + 1, n.len - 2): b.add(n.sons[i])
var linkStr = ""
for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes)
if linkStr != "":
let protocol = safeProtocol(linkStr)
result = linkStr != ""
if not result:
rstMessage(p, mwBrokenLink, protocol,
p.tok[p.idx-3].line, p.tok[p.idx-3].col)
b.add newLeaf(linkStr)

proc whichRole(p: RstParser, sym: string): RstNodeKind =
result = whichRoleAux(sym)
Expand Down Expand Up @@ -1407,14 +1428,17 @@ proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
var a = newRstNode(rnInner)
var b = newRstNode(rnInner)
fixupEmbeddedRef(n, a, b)
if a.len == 0:
newKind = rnStandaloneHyperlink
newSons = @[b]
else:
newKind = rnHyperlink
newSons = @[a, b]
setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
if fixupEmbeddedRef(p, n, a, b):
if a.len == 0: # e.g. `<a_named_relative_link>`_
newKind = rnStandaloneHyperlink
newSons = @[b]
else: # e.g. `link title <http://site>`_
newKind = rnHyperlink
newSons = @[a, b]
setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
else: # include as plain text, not a link
newKind = rnInner
newSons = n.sons
result = newRstNode(newKind, newSons)
else: # some link that will be resolved in `resolveSubs`
newKind = rnRef
Expand Down Expand Up @@ -1623,7 +1647,6 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
result.add(lb)

proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
result = true
var desc, link = ""
var i = p.idx

Expand All @@ -1642,14 +1665,21 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =

parse("]", desc)
if p.tok[i].symbol != "(": return false
let linkIdx = i + 1
parse(")", link)
let child = newRstNode(rnHyperlink)
child.add desc
child.add link
# only commit if we detected no syntax error:
father.add child
p.idx = i
result = true
let protocol = safeProtocol(link)
if link == "":
result = false
rstMessage(p, mwBrokenLink, protocol,
p.tok[linkIdx].line, p.tok[linkIdx].col)
else:
let child = newRstNode(rnHyperlink)
child.add desc
child.add link
father.add child
p.idx = i
result = true

proc getFootnoteType(label: PRstNode): (FootnoteType, int) =
if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and
Expand Down
17 changes: 3 additions & 14 deletions lib/packages/docutils/rstgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
## can be done by simply searching for [footnoteName].

import strutils, os, hashes, strtabs, rstast, rst, highlite, tables, sequtils,
algorithm, parseutils, std/strbasics, strscans
algorithm, parseutils, std/strbasics

import ../../std/private/since

Expand Down Expand Up @@ -826,17 +826,6 @@ proc renderOverline(d: PDoc, n: PRstNode, result: var string) =
"\\rstov$4[$5]{$3}$2\n", [$n.level,
rstnodeToRefname(n).idS, tmp, $chr(n.level - 1 + ord('A')), tocName])


proc safeProtocol(linkStr: var string) =
var protocol = ""
if scanf(linkStr, "$w:", protocol):
# if it has a protocol at all, ensure that it's not 'javascript:' or worse:
if cmpIgnoreCase(protocol, "http") == 0 or cmpIgnoreCase(protocol, "https") == 0 or
cmpIgnoreCase(protocol, "ftp") == 0:
discard "it's fine"
else:
linkStr = ""

proc renderTocEntry(d: PDoc, e: TocEntry, result: var string) =
dispA(d.target, result,
"<li><a class=\"reference\" id=\"$1_toc\" href=\"#$1\">$2</a></li>\n",
Expand Down Expand Up @@ -901,7 +890,7 @@ proc renderImage(d: PDoc, n: PRstNode, result: var string) =

# support for `:target:` links for images:
var target = esc(d.target, getFieldValue(n, "target").strip(), escMode=emUrl)
safeProtocol(target)
discard safeProtocol(target)

if target.len > 0:
# `htmlOut` needs to be of the following format for link to work for images:
Expand Down Expand Up @@ -1205,7 +1194,7 @@ proc renderHyperlink(d: PDoc, text, link: PRstNode, result: var string,
d.escMode = emUrl
renderRstToOut(d, link, linkStr)
d.escMode = mode
safeProtocol(linkStr)
discard safeProtocol(linkStr)
var textStr = ""
renderRstToOut(d, text, textStr)
let nimDocStr = if nimdoc: " nimdoc" else: ""
Expand Down
27 changes: 21 additions & 6 deletions tests/stdlib/trst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -843,12 +843,7 @@ suite "Warnings":
rnInner
rnLeaf 'foo'
rnInner
rnLeaf '#'
rnLeaf 'foo'
rnLeaf ','
rnLeaf 'string'
rnLeaf ','
rnLeaf 'string'
rnLeaf '#foo,string,string'
rnParagraph anchor='foo'
rnLeaf 'Paragraph'
rnLeaf '.'
Expand Down Expand Up @@ -1256,3 +1251,23 @@ suite "RST inline markup":
rnLeaf 'my {link example'
rnLeaf 'http://example.com/bracket_(symbol_[)'
""")

test "not a Markdown link":
# bug #17340 (27) `f` will be considered as a protocol and blocked as unsafe
var warnings = new seq[string]
check("[T](f: var Foo)".toAst(warnings = warnings) ==
dedent"""
rnInner
rnLeaf '['
rnLeaf 'T'
rnLeaf ']'
rnLeaf '('
rnLeaf 'f'
rnLeaf ':'
rnLeaf ' '
rnLeaf 'var'
rnLeaf ' '
rnLeaf 'Foo'
rnLeaf ')'
""")
check(warnings[] == @["input(1, 5) Warning: broken link 'f'"])
11 changes: 9 additions & 2 deletions tests/stdlib/trstgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1593,8 +1593,15 @@ suite "invalid targets":
test "invalid links":
check("(([Nim](https://nim-lang.org/)))".toHtml ==
"""((<a class="reference external" href="https://nim-lang.org/">Nim</a>))""")
check("(([Nim](javascript://nim-lang.org/)))".toHtml ==
"""((<a class="reference external" href="">Nim</a>))""")
# unknown protocol is treated just like plain text, not a link
var warnings = new seq[string]
check("(([Nim](javascript://nim-lang.org/)))".toHtml(warnings=warnings) ==
"""(([Nim](javascript://nim-lang.org/)))""")
check(warnings[] == @["input(1, 9) Warning: broken link 'javascript'"])
warnings[].setLen 0
check("`Nim <javascript://nim-lang.org/>`_".toHtml(warnings=warnings) ==
"""Nim &lt;javascript://nim-lang.org/&gt;""")
check(warnings[] == @["input(1, 33) Warning: broken link 'javascript'"])

suite "local file inclusion":
test "cannot include files in sandboxed mode":
Expand Down

0 comments on commit 801c0f0

Please sign in to comment.